diff --git a/drivers/d3d12/rendering_shader_container_d3d12.cpp b/drivers/d3d12/rendering_shader_container_d3d12.cpp index 06d21aceb2f1..0a766b309c6c 100644 --- a/drivers/d3d12/rendering_shader_container_d3d12.cpp +++ b/drivers/d3d12/rendering_shader_container_d3d12.cpp @@ -266,7 +266,7 @@ uint32_t RenderingShaderContainerD3D12::_to_bytes_footer_extra_data(uint8_t *p_b } #if NIR_ENABLED -bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span p_spirv, const nir_shader_compiler_options *p_compiler_options, HashMap &r_stages_nir_shaders, Vector &r_stages, BitField &r_stages_processed) { +bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span p_spirv, const nir_shader_compiler_options *p_compiler_options, HashMap &r_stages_nir_shaders, Vector &r_stages, BitField &r_stages_processed) { r_stages_processed.clear(); dxil_spirv_runtime_conf dxil_runtime_conf = {}; @@ -428,7 +428,7 @@ bool RenderingShaderContainerD3D12::_convert_nir_to_dxil(const HashMap p_spirv, HashMap> &r_dxil_blobs, Vector &r_stages, BitField &r_stages_processed) { +bool RenderingShaderContainerD3D12::_convert_spirv_to_dxil(Span p_spirv, HashMap> &r_dxil_blobs, Vector &r_stages, BitField &r_stages_processed) { r_dxil_blobs.clear(); HashMap stages_nir_shaders; @@ -763,7 +763,7 @@ void RenderingShaderContainerD3D12::_nir_report_bitcode_bit_offset(uint64_t p_bi } #endif -void RenderingShaderContainerD3D12::_set_from_shader_reflection_post(const RenderingDeviceCommons::ShaderReflection &p_reflection) { +void RenderingShaderContainerD3D12::_set_from_shader_reflection_post(const ReflectShader &p_shader) { reflection_binding_set_uniforms_data_d3d12.resize(reflection_binding_set_uniforms_data.size()); reflection_specialization_data_d3d12.resize(reflection_specialization_data.size()); @@ -779,8 +779,9 @@ void RenderingShaderContainerD3D12::_set_from_shader_reflection_post(const Rende } } -bool RenderingShaderContainerD3D12::_set_code_from_spirv(Span p_spirv) { +bool RenderingShaderContainerD3D12::_set_code_from_spirv(const ReflectShader &p_shader) { #if NIR_ENABLED + const LocalVector &p_spirv = p_shader.shader_stages; reflection_data_d3d12.nir_runtime_data_root_param_idx = UINT32_MAX; for (int64_t i = 0; i < reflection_specialization_data.size(); i++) { diff --git a/drivers/d3d12/rendering_shader_container_d3d12.h b/drivers/d3d12/rendering_shader_container_d3d12.h index d5a1813d4734..f539f0c48f39 100644 --- a/drivers/d3d12/rendering_shader_container_d3d12.h +++ b/drivers/d3d12/rendering_shader_container_d3d12.h @@ -122,9 +122,9 @@ class RenderingShaderContainerD3D12 : public RenderingShaderContainer { uint32_t root_signature_crc = 0; #if NIR_ENABLED - bool _convert_spirv_to_nir(Span p_spirv, const nir_shader_compiler_options *p_compiler_options, HashMap &r_stages_nir_shaders, Vector &r_stages, BitField &r_stages_processed); + bool _convert_spirv_to_nir(Span p_spirv, const nir_shader_compiler_options *p_compiler_options, HashMap &r_stages_nir_shaders, Vector &r_stages, BitField &r_stages_processed); bool _convert_nir_to_dxil(const HashMap &p_stages_nir_shaders, BitField p_stages_processed, HashMap> &r_dxil_blobs); - bool _convert_spirv_to_dxil(Span p_spirv, HashMap> &r_dxil_blobs, Vector &r_stages, BitField &r_stages_processed); + bool _convert_spirv_to_dxil(Span p_spirv, HashMap> &r_dxil_blobs, Vector &r_stages, BitField &r_stages_processed); bool _generate_root_signature(BitField p_stages_processed); // GodotNirCallbacks. @@ -146,8 +146,8 @@ class RenderingShaderContainerD3D12 : public RenderingShaderContainer { virtual uint32_t _to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; virtual uint32_t _to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; virtual uint32_t _to_bytes_footer_extra_data(uint8_t *p_bytes) const override; - virtual void _set_from_shader_reflection_post(const RenderingDeviceCommons::ShaderReflection &p_reflection) override; - virtual bool _set_code_from_spirv(Span p_spirv) override; + virtual void _set_from_shader_reflection_post(const ReflectShader &p_shader) override; + virtual bool _set_code_from_spirv(const ReflectShader &p_shader) override; public: struct ShaderReflectionD3D12 { diff --git a/drivers/metal/SCsub b/drivers/metal/SCsub index 24016d38fe2c..3cf450c49e57 100644 --- a/drivers/metal/SCsub +++ b/drivers/metal/SCsub @@ -46,6 +46,7 @@ env_metal.Append(CCFLAGS=["-fmodules", "-fcxx-modules"]) driver_obj = [] env_metal.add_source_files(driver_obj, "*.mm") +env_metal.add_source_files(driver_obj, "*.cpp") env.drivers_sources += driver_obj # Needed to force rebuilding the driver files when the thirdparty library is updated. diff --git a/drivers/metal/metal_device_profile.cpp b/drivers/metal/metal_device_profile.cpp new file mode 100644 index 000000000000..d373dbd4651d --- /dev/null +++ b/drivers/metal/metal_device_profile.cpp @@ -0,0 +1,129 @@ +/**************************************************************************/ +/* metal_device_profile.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "metal_device_profile.h" + +#include "metal_utils.h" + +Mutex MetalDeviceProfile::profiles_lock; +HashMap MetalDeviceProfile::profiles; + +const MetalDeviceProfile *MetalDeviceProfile::get_profile(Platform p_platform, GPU p_gpu, MinOsVersion p_min_os_version) { + DEV_ASSERT(p_platform == Platform::macOS || p_platform == Platform::iOS || p_platform == Platform::visionOS); + + MutexLock lock(profiles_lock); + + ProfileKey key(p_min_os_version, p_platform, p_gpu); + if (MetalDeviceProfile *profile = profiles.getptr(key)) { + return profile; + } + + MetalDeviceProfile res; + res.platform = p_platform; + res.gpu = p_gpu; + res.min_os_version = p_min_os_version; + + switch (p_platform) { + case Platform::macOS: { + if (p_min_os_version >= os_version::MACOS_26_0) { + res.features.msl_version = MSL_VERSION_40; + } else if (p_min_os_version >= os_version::MACOS_15_0) { + res.features.msl_version = MSL_VERSION_32; + } else if (p_min_os_version >= os_version::MACOS_14_0) { + res.features.msl_version = MSL_VERSION_31; + } else if (p_min_os_version >= os_version::MACOS_13_0) { + res.features.msl_version = MSL_VERSION_30; + } else if (p_min_os_version >= os_version::MACOS_12_0) { + res.features.msl_version = MSL_VERSION_24; + } else { + res.features.msl_version = MSL_VERSION_23; + } + res.features.use_argument_buffers = p_min_os_version >= os_version::MACOS_13_0; + res.features.simdPermute = true; + } break; + + case Platform::iOS: { + if (p_min_os_version >= os_version::IOS_26_0) { + res.features.msl_version = MSL_VERSION_40; + } else if (p_min_os_version >= os_version::IOS_18_0) { + res.features.msl_version = MSL_VERSION_32; + } else if (p_min_os_version >= os_version::IOS_17_0) { + res.features.msl_version = MSL_VERSION_31; + } else if (p_min_os_version >= os_version::IOS_16_0) { + res.features.msl_version = MSL_VERSION_30; + } else if (p_min_os_version >= os_version::IOS_15_0) { + res.features.msl_version = MSL_VERSION_24; + } else { + res.features.msl_version = MSL_VERSION_23; + } + + switch (p_gpu) { + case GPU::Apple1: + case GPU::Apple2: + case GPU::Apple3: + case GPU::Apple4: + case GPU::Apple5: { + res.features.simdPermute = false; + res.features.use_argument_buffers = false; + } break; + case GPU::Apple6: + case GPU::Apple7: + case GPU::Apple8: + case GPU::Apple9: { + res.features.use_argument_buffers = p_min_os_version >= os_version::IOS_16_0; + res.features.simdPermute = true; + } break; + } + } break; + + case Platform::visionOS: { + if (p_min_os_version >= os_version::VISIONOS_26_0) { + res.features.msl_version = MSL_VERSION_40; + } else if (p_min_os_version >= os_version::VISIONOS_02_4) { + res.features.msl_version = MSL_VERSION_32; + } else { + ERR_FAIL_V_MSG(nullptr, "visionOS 2.4 is the minimum supported version for visionOS."); + } + + switch (p_gpu) { + case GPU::Apple8: + case GPU::Apple9: { + res.features.use_argument_buffers = true; + res.features.simdPermute = true; + } break; + default: { + CRASH_NOW_MSG("visionOS hardware has a minimum Apple8 GPU."); + } + } + } break; + } + + return &profiles.insert(key, res)->value; +} diff --git a/drivers/metal/metal_device_profile.h b/drivers/metal/metal_device_profile.h new file mode 100644 index 000000000000..1521a582b3be --- /dev/null +++ b/drivers/metal/metal_device_profile.h @@ -0,0 +1,157 @@ +/**************************************************************************/ +/* metal_device_profile.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "core/os/mutex.h" +#include "core/string/ustring.h" +#include "core/templates/hash_map.h" +#include "core/typedefs.h" + +class MinOsVersion { + uint32_t version; + +public: + String to_compiler_os_version() const; + bool is_null() const { return version == UINT32_MAX; } + bool is_valid() const { return version != UINT32_MAX; } + + MinOsVersion(const String &p_version); + constexpr explicit MinOsVersion(uint32_t p_version) : + version(p_version) {} + constexpr MinOsVersion(uint32_t p_major, uint32_t p_minor, uint32_t p_patch = 0) : + version(p_major * 10000 + p_minor * 100 + p_patch) {} + constexpr MinOsVersion() : + version(UINT32_MAX) {} + + bool operator>(uint32_t p_other) { + return version > p_other; + } + constexpr operator uint32_t() const { return version; } +}; + +namespace os_version { + +constexpr MinOsVersion MACOS_26_0(26'00'00); +constexpr MinOsVersion MACOS_15_0(15'00'00); +constexpr MinOsVersion MACOS_14_0(14'00'00); +constexpr MinOsVersion MACOS_13_0(13'00'00); +constexpr MinOsVersion MACOS_12_0(12'00'00); +constexpr MinOsVersion MACOS_11_0(11'00'00); + +constexpr MinOsVersion IOS_26_0(26'00'00); +constexpr MinOsVersion IOS_18_0(18'00'00); +constexpr MinOsVersion IOS_17_0(17'00'00); +constexpr MinOsVersion IOS_16_0(16'00'00); +constexpr MinOsVersion IOS_15_0(15'00'00); + +constexpr MinOsVersion VISIONOS_26_0(26'00'00); +constexpr MinOsVersion VISIONOS_02_4(02'04'00); + +} //namespace os_version + +/// @brief A minimal structure that defines a device profile for Metal. +/// +/// This structure is used by the `RenderingShaderContainerMetal` class to +/// determine options for compiling SPIR-V to Metal source. It currently only +/// contains the minimum properties required to transform shaders from SPIR-V to Metal +/// and potentially compile to a `.metallib`. +struct MetalDeviceProfile { + enum class Platform : uint32_t { + macOS = 0, + iOS = 1, + visionOS = 2, + }; + + /*! @brief The GPU family. + * + * NOTE: These values match Apple's MTLGPUFamily + */ + enum class GPU : uint32_t { + Apple1 = 1001, + Apple2 = 1002, + Apple3 = 1003, + Apple4 = 1004, + Apple5 = 1005, + Apple6 = 1006, + Apple7 = 1007, + Apple8 = 1008, + Apple9 = 1009, + }; + + enum class ArgumentBuffersTier : uint32_t { + Tier1 = 0, + Tier2 = 1, + }; + + struct Features { + uint32_t msl_version = 0; + bool use_argument_buffers = false; + bool simdPermute = false; + }; + + Platform platform = Platform::macOS; + GPU gpu = GPU::Apple4; + MinOsVersion min_os_version; + Features features; + + static const MetalDeviceProfile *get_profile(Platform p_platform, GPU p_gpu, MinOsVersion p_min_os_version); + + MetalDeviceProfile() = default; + +private: + static Mutex profiles_lock; ///< Mutex to protect access to the profiles map. + + struct ProfileKey { + friend struct HashMapHasherDefaultImpl; + union { + struct { + uint32_t min_os_version; + uint16_t platform; + uint16_t gpu; + }; + uint64_t value = 0; + }; + + ProfileKey() = default; + ProfileKey(MinOsVersion p_min_os_version, Platform p_platform, GPU p_gpu) : + min_os_version(p_min_os_version), platform((uint16_t)p_platform), gpu((uint16_t)p_gpu) {} + + _FORCE_INLINE_ uint32_t hash() const { + return hash_one_uint64(value); + } + + bool operator==(const ProfileKey &p_other) const { + return value == p_other.value; + } + }; + + static HashMap profiles; +}; diff --git a/drivers/metal/metal_device_properties.h b/drivers/metal/metal_device_properties.h index 85959fd45e1d..447e485de6b8 100644 --- a/drivers/metal/metal_device_properties.h +++ b/drivers/metal/metal_device_properties.h @@ -70,15 +70,20 @@ typedef NS_OPTIONS(NSUInteger, SampleCount) { }; struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures { - uint32_t mslVersionMajor = 0; - uint32_t mslVersionMinor = 0; + /// Maximum version of the Metal Shading Language version available. + uint32_t msl_max_version = 0; + /*! @brief Target version of the Metal Shading Language used to translate shaders. + * + * This can be used to override the features used to generate shaders. Primarily + * for engine developers for testing. + */ + uint32_t msl_target_version = 0; MTLGPUFamily highestFamily = MTLGPUFamilyApple4; bool supportsBCTextureCompression = false; bool supportsDepth24Stencil8 = false; bool supports32BitFloatFiltering = false; bool supports32BitMSAA = false; bool supportsMac = TARGET_OS_OSX; - MTLLanguageVersion mslVersionEnum = MTLLanguageVersion1_2; SampleCount supportedSampleCounts = SampleCount1; long hostMemoryPageSize = 0; bool layeredRendering = false; @@ -89,14 +94,29 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures { bool tessellationShader = false; /**< If true, tessellation shaders are supported. */ bool imageCubeArray = false; /**< If true, image cube arrays are supported. */ MTLArgumentBuffersTier argument_buffers_tier = MTLArgumentBuffersTier1; - /// If true, argument encoders are required to encode arguments into an argument buffer. - bool needs_arg_encoders = true; + bool needs_arg_encoders = true; /**< If true, argument encoders are required to encode arguments into an argument buffer. */ + bool use_argument_buffers = true; /**< If true, argument buffers are can be used instead of slot binding, if available. */ bool metal_fx_spatial = false; /**< If true, Metal FX spatial functions are supported. */ bool metal_fx_temporal = false; /**< If true, Metal FX temporal functions are supported. */ bool supports_gpu_address = false; /**< If true, referencing a GPU address in a shader is supported. */ bool supports_image_atomic_32_bit = false; /**< If true, 32-bit atomic operations on images are supported by the GPU. */ bool supports_image_atomic_64_bit = false; /**< If true, 64-bit atomic operations on images are supported by the GPU. */ bool supports_native_image_atomics = false; /**< If true, native image atomic operations are supported by the OS. */ + bool supports_residency_sets = false; /**< If true, residency sets (MTLResidencySet) are supported by the OS. */ + + /*! + * Check if argument buffers are fully supported, which requires tier 2 support and no need for argument encoders. + */ + _FORCE_INLINE_ bool argument_buffers_supported() const { + return argument_buffers_tier == MTLArgumentBuffersTier2 && needs_arg_encoders == false; + } + + /*! + * Check if argument buffers can be used, which requires that they are supported and that the user has enabled their use. + */ + _FORCE_INLINE_ bool argument_buffers_enabled() const { + return use_argument_buffers && argument_buffers_supported(); + } }; struct MetalLimits { diff --git a/drivers/metal/metal_device_properties.mm b/drivers/metal/metal_device_properties.mm index 0cfe98e49ce0..2abec4b40132 100644 --- a/drivers/metal/metal_device_properties.mm +++ b/drivers/metal/metal_device_properties.mm @@ -50,7 +50,9 @@ #import "metal_device_properties.h" -#include "servers/rendering/renderer_rd/effects/metal_fx.h" +#import "metal_utils.h" + +#import "servers/rendering/renderer_rd/effects/metal_fx.h" #import #import @@ -78,6 +80,28 @@ void MetalDeviceProperties::init_features(id p_device) { features = {}; + MTLCompileOptions *opts = [MTLCompileOptions new]; + features.msl_max_version = make_msl_version((opts.languageVersion >> 0x10) & 0xff, (opts.languageVersion >> 0x00) & 0xff); + features.msl_target_version = features.msl_max_version; + if (String version = OS::get_singleton()->get_environment("GODOT_MTL_TARGET_VERSION"); !version.is_empty()) { + if (version != "max") { + Vector parts = version.split(".", true, 2); + if (parts.size() == 2) { + uint32_t major = parts[0].to_int(); + uint32_t minor = parts[1].to_int(); + uint32_t msl_version = make_msl_version(major, minor); + if (msl_version < MSL_VERSION_23 || msl_version > MSL_VERSION_40) { + WARN_PRINT(vformat("GODOT_MTL_TARGET_VERSION: invalid MSL version '%d.%d'", major, minor)); + } else { + print_line(vformat("Override: Targeting Metal version %d.%d", major, minor)); + features.msl_target_version = msl_version; + } + } else { + WARN_PRINT("GODOT_MTL_TARGET_VERSION: invalid version string format. Expected major.minor or 'max'."); + } + } + } + features.highestFamily = MTLGPUFamilyApple1; for (MTLGPUFamily family = MTLGPUFamilyApple9; family >= MTLGPUFamilyApple1; --family) { if ([p_device supportsFamily:family]) { @@ -123,17 +147,32 @@ features.argument_buffers_tier = p_device.argumentBuffersSupport; features.supports_image_atomic_32_bit = [p_device supportsFamily:MTLGPUFamilyApple6]; features.supports_image_atomic_64_bit = [p_device supportsFamily:MTLGPUFamilyApple9] || ([p_device supportsFamily:MTLGPUFamilyApple8] && [p_device supportsFamily:MTLGPUFamilyMac2]); - if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, visionOS 1.0, *)) { - features.supports_native_image_atomics = true; + + if (features.msl_target_version >= MSL_VERSION_31) { + // Native atomics are only supported on 3.1 and above. + if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, visionOS 1.0, *)) { + features.supports_native_image_atomics = true; + } } + if (OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_IMAGE_ATOMICS") == "1") { features.supports_native_image_atomics = false; } + if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) { + features.supports_residency_sets = true; + } else { + features.supports_residency_sets = false; + } + if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) { features.needs_arg_encoders = !([p_device supportsFamily:MTLGPUFamilyMetal3] && features.argument_buffers_tier == MTLArgumentBuffersTier2); } + if (String v = OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_ARGUMENT_BUFFERS"); v == "1") { + features.use_argument_buffers = false; + } + if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) { features.metal_fx_spatial = [MTLFXSpatialScalerDescriptor supportsDevice:p_device]; #ifdef METAL_MFXTEMPORAL_ENABLED @@ -142,11 +181,6 @@ features.metal_fx_temporal = false; #endif } - - MTLCompileOptions *opts = [MTLCompileOptions new]; - features.mslVersionEnum = opts.languageVersion; // By default, Metal uses the most recent language version. - features.mslVersionMajor = (opts.languageVersion >> 0x10) & 0xff; - features.mslVersionMinor = (opts.languageVersion >> 0x00) & 0xff; } void MetalDeviceProperties::init_limits(id p_device) { diff --git a/drivers/metal/metal_objects.h b/drivers/metal/metal_objects.h index 4520b79b8b3f..df1e6da31090 100644 --- a/drivers/metal/metal_objects.h +++ b/drivers/metal/metal_objects.h @@ -51,6 +51,7 @@ /**************************************************************************/ #import "metal_device_properties.h" +#import "metal_objects_shared.h" #import "metal_utils.h" #import "pixel_formats.h" #import "sha256_digest.h" @@ -66,38 +67,8 @@ #import #import -// These types can be used in Vector and other containers that use -// pointer operations not supported by ARC. -namespace MTL { -#define MTL_CLASS(name) \ - class name { \ - public: \ - name(id obj = nil) : m_obj(obj) {} \ - operator id() const { \ - return m_obj; \ - } \ - id m_obj; \ - }; - -MTL_CLASS(Texture) - -} //namespace MTL - -enum ShaderStageUsage : uint32_t { - None = 0, - Vertex = RDD::SHADER_STAGE_VERTEX_BIT, - Fragment = RDD::SHADER_STAGE_FRAGMENT_BIT, - TesselationControl = RDD::SHADER_STAGE_TESSELATION_CONTROL_BIT, - TesselationEvaluation = RDD::SHADER_STAGE_TESSELATION_EVALUATION_BIT, - Compute = RDD::SHADER_STAGE_COMPUTE_BIT, -}; - -_FORCE_INLINE_ ShaderStageUsage &operator|=(ShaderStageUsage &p_a, int p_b) { - p_a = ShaderStageUsage(uint32_t(p_a) | uint32_t(p_b)); - return p_a; -} - enum StageResourceUsage : uint32_t { + ResourceUnused = 0, VertexRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_VERTEX * 2), VertexWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_VERTEX * 2), FragmentRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_FRAGMENT * 2), @@ -110,9 +81,61 @@ enum StageResourceUsage : uint32_t { ComputeWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_COMPUTE * 2), }; -typedef LocalVector<__unsafe_unretained id> ResourceVector; +typedef id __unsafe_unretained MTLResourceUnsafe; + +template <> +struct HashMapHasherDefaultImpl { + static _FORCE_INLINE_ uint32_t hash(const MTLResourceUnsafe p_pointer) { return hash_one_uint64((uint64_t)p_pointer); } +}; + +typedef LocalVector ResourceVector; typedef HashMap ResourceUsageMap; +struct ResourceUsageEntry { + StageResourceUsage usage = ResourceUnused; + uint32_t unused = 0; + + ResourceUsageEntry() {} + ResourceUsageEntry(StageResourceUsage p_usage) : + usage(p_usage) {} +}; + +template <> +struct is_zero_constructible : std::true_type {}; + +/*! Track the cumulative usage for a resource during a render or compute pass */ +typedef HashMap ResourceToStageUsage; + +/*! Track resource and ensure they are resident prior to dispatch or draw commands. + * + * The primary purpose of this data structure is to track all the resources that must be made resident prior + * to issuing the next dispatch or draw command. It aggregates all resources used from argument buffers. + * + * As an optimization, this data structure also tracks previous usage for resources, so that + * it may avoid binding them again in later commands if the resource is already resident and its usage flagged. + */ +struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) ResourceTracker { + // A constant specifying how many iterations a resource can remain in + // the _previous HashSet before it will be removed permanently. + // + // Keeping them in the _previous HashMap reduces churn if resources are regularly + // bound. 256 is arbitrary, but if an object remains unused for 256 encoders, + // it will be released. + static constexpr uint32_t RESOURCE_UNUSED_CLEANUP_COUNT = 256; + + // Used as a scratch buffer to periodically clean up resources from _previous. + ResourceVector _scratch; + // Tracks all resources and their prior usage for the duration of the encoder. + ResourceToStageUsage _previous; + // Tracks resources for the current command that must be made resident + ResourceUsageMap _current; + + void merge_from(const ResourceUsageMap &p_from); + void encode(id __unsafe_unretained p_enc); + void encode(id __unsafe_unretained p_enc); + void reset(); +}; + enum class MDCommandBufferStateType { None, Render, @@ -130,53 +153,15 @@ class MDRenderPass; class MDPipeline; class MDRenderPipeline; class MDComputePipeline; -class MDFrameBuffer; class RenderingDeviceDriverMetal; class MDUniformSet; class MDShader; struct MetalBufferDynamicInfo; -#pragma mark - Resource Factory - -struct ClearAttKey { - const static uint32_t COLOR_COUNT = MAX_COLOR_ATTACHMENT_COUNT; - const static uint32_t DEPTH_INDEX = COLOR_COUNT; - const static uint32_t STENCIL_INDEX = DEPTH_INDEX + 1; - const static uint32_t ATTACHMENT_COUNT = STENCIL_INDEX + 1; +using RDM = RenderingDeviceDriverMetal; - enum Flags : uint16_t { - CLEAR_FLAGS_NONE = 0, - CLEAR_FLAGS_LAYERED = 1 << 0, - }; - - Flags flags = CLEAR_FLAGS_NONE; - uint16_t sample_count = 0; - uint16_t pixel_formats[ATTACHMENT_COUNT] = { 0 }; - - _FORCE_INLINE_ void set_color_format(uint32_t p_idx, MTLPixelFormat p_fmt) { pixel_formats[p_idx] = p_fmt; } - _FORCE_INLINE_ void set_depth_format(MTLPixelFormat p_fmt) { pixel_formats[DEPTH_INDEX] = p_fmt; } - _FORCE_INLINE_ void set_stencil_format(MTLPixelFormat p_fmt) { pixel_formats[STENCIL_INDEX] = p_fmt; } - _FORCE_INLINE_ MTLPixelFormat depth_format() const { return (MTLPixelFormat)pixel_formats[DEPTH_INDEX]; } - _FORCE_INLINE_ MTLPixelFormat stencil_format() const { return (MTLPixelFormat)pixel_formats[STENCIL_INDEX]; } - _FORCE_INLINE_ void enable_layered_rendering() { flags::set(flags, CLEAR_FLAGS_LAYERED); } - - _FORCE_INLINE_ bool is_enabled(uint32_t p_idx) const { return pixel_formats[p_idx] != 0; } - _FORCE_INLINE_ bool is_depth_enabled() const { return pixel_formats[DEPTH_INDEX] != 0; } - _FORCE_INLINE_ bool is_stencil_enabled() const { return pixel_formats[STENCIL_INDEX] != 0; } - _FORCE_INLINE_ bool is_layered_rendering_enabled() const { return flags::any(flags, CLEAR_FLAGS_LAYERED); } - - _FORCE_INLINE_ bool operator==(const ClearAttKey &p_rhs) const { - return memcmp(this, &p_rhs, sizeof(ClearAttKey)) == 0; - } - - uint32_t hash() const { - uint32_t h = hash_murmur3_one_32(flags); - h = hash_murmur3_one_32(sample_count, h); - h = hash_murmur3_buffer(pixel_formats, ATTACHMENT_COUNT * sizeof(pixel_formats[0]), h); - return hash_fmix32(h); - } -}; +#pragma mark - Resource Factory class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDResourceFactory { private: @@ -309,13 +294,127 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDRenderPass { MDRenderPass(Vector &p_attachments, Vector &p_subpasses); }; +struct BindingCache { + struct BufferBinding { + id __unsafe_unretained buffer = nil; + NSUInteger offset = 0; + + bool operator!=(const BufferBinding &p_other) const { + return buffer != p_other.buffer || offset != p_other.offset; + } + }; + + LocalVector __unsafe_unretained> textures; + LocalVector __unsafe_unretained> samplers; + LocalVector buffers; + + _FORCE_INLINE_ void clear() { + textures.clear(); + samplers.clear(); + buffers.clear(); + } + +private: + template + _FORCE_INLINE_ void ensure_size(LocalVector &p_vec, uint32_t p_required) { + if (p_vec.size() < p_required) { + p_vec.resize_initialized(p_required); + } + } + +public: + _FORCE_INLINE_ bool update(NSRange p_range, id __unsafe_unretained const *p_values) { + if (p_range.length == 0) { + return false; + } + uint32_t required = (uint32_t)(p_range.location + p_range.length); + ensure_size(textures, required); + bool changed = false; + for (NSUInteger i = 0; i < p_range.length; ++i) { + uint32_t slot = (uint32_t)(p_range.location + i); + id value = p_values[i]; + if (textures[slot] != value) { + textures[slot] = value; + changed = true; + } + } + return changed; + } + + _FORCE_INLINE_ bool update(NSRange p_range, id __unsafe_unretained const *p_values) { + if (p_range.length == 0) { + return false; + } + uint32_t required = (uint32_t)(p_range.location + p_range.length); + ensure_size(samplers, required); + bool changed = false; + for (NSUInteger i = 0; i < p_range.length; ++i) { + uint32_t slot = (uint32_t)(p_range.location + i); + id __unsafe_unretained value = p_values[i]; + if (samplers[slot] != value) { + samplers[slot] = value; + changed = true; + } + } + return changed; + } + + _FORCE_INLINE_ bool update(NSRange p_range, id __unsafe_unretained const *p_values, const NSUInteger *p_offsets) { + if (p_range.length == 0) { + return false; + } + uint32_t required = (uint32_t)(p_range.location + p_range.length); + ensure_size(buffers, required); + BufferBinding *buffers_ptr = buffers.ptr() + p_range.location; + bool changed = false; + for (NSUInteger i = 0; i < p_range.length; ++i) { + BufferBinding &binding = *buffers_ptr; + BufferBinding new_binding = { + .buffer = p_values[i], + .offset = p_offsets[i], + }; + if (binding != new_binding) { + binding = new_binding; + changed = true; + } + ++buffers_ptr; + } + return changed; + } + + _FORCE_INLINE_ bool update(id __unsafe_unretained p_buffer, NSUInteger p_offset, uint32_t p_index) { + uint32_t required = p_index + 1; + ensure_size(buffers, required); + BufferBinding &binding = buffers.ptr()[p_index]; + BufferBinding new_binding = { + .buffer = p_buffer, + .offset = p_offset, + }; + if (binding != new_binding) { + binding = new_binding; + return true; + } + return false; + } +}; + class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer { + friend class MDUniformSet; + private: #pragma mark - Common State // From RenderingDevice static constexpr uint32_t MAX_PUSH_CONSTANT_SIZE = 128; + uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE]; + uint32_t push_constant_data_len = 0; + uint32_t push_constant_binding = UINT32_MAX; + + BindingCache binding_cache; + + void reset(); + RenderingDeviceDriverMetal *device_driver = nullptr; id queue = nil; id commandBuffer = nil; @@ -331,6 +430,16 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer { void _end_compute_dispatch(); void _end_blit(); + id _ensure_blit_encoder(); + + enum class CopySource { + Buffer, + Texture, + }; + void _copy_texture_buffer(CopySource p_source, + RDD::TextureID p_texture, + RDD::BufferID p_buffer, + VectorView p_regions); #pragma mark - Render @@ -368,7 +477,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer { uint32_t index_offset = 0; LocalVector __unsafe_unretained> vertex_buffers; LocalVector vertex_offsets; - ResourceUsageMap resource_usage; + ResourceTracker resource_tracker; // clang-format off enum DirtyFlag: uint16_t { DIRTY_NONE = 0, @@ -390,9 +499,6 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer { uint32_t dynamic_offsets = 0; // Bit mask of the uniform sets that are dirty, to prevent redundant binding. uint64_t uniform_set_mask = 0; - uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE]; - uint32_t push_constant_data_len = 0; - uint32_t push_constant_bindings[2] = { ~0U, ~0U }; _FORCE_INLINE_ void reset(); void end_encoding(); @@ -447,13 +553,6 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer { dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS); } - _FORCE_INLINE_ void mark_push_constants_dirty() { - if (push_constant_data_len == 0) { - return; - } - dirty.set_flag(DirtyFlag::DIRTY_PUSH); - } - _FORCE_INLINE_ void mark_blend_dirty() { if (!blend_constants.has_value()) { return; @@ -495,7 +594,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer { struct ComputeState { MDComputePipeline *pipeline = nullptr; id encoder = nil; - ResourceUsageMap resource_usage; + ResourceTracker resource_tracker; // clang-format off enum DirtyFlag: uint16_t { DIRTY_NONE = 0, @@ -511,9 +610,6 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer { uint32_t dynamic_offsets = 0; // Bit mask of the uniform sets that are dirty, to prevent redundant binding. uint64_t uniform_set_mask = 0; - uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE]; - uint32_t push_constant_data_len = 0; - uint32_t push_constant_bindings[1] = { ~0U }; _FORCE_INLINE_ void reset(); void end_encoding(); @@ -529,14 +625,6 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer { } dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS); } - - _FORCE_INLINE_ void mark_push_constants_dirty() { - if (push_constant_data_len == 0) { - return; - } - dirty.set_flag(DirtyFlag::DIRTY_PUSH); - } - } compute; // State specific to a blit pass. @@ -555,9 +643,6 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer { void commit(); void end(); - id blit_command_encoder(); - void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label); - void bind_pipeline(RDD::PipelineID p_pipeline); void encode_push_constant_data(RDD::ShaderID p_shader, VectorView p_data); @@ -600,6 +685,25 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer { void compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); void compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset); +#pragma mark - Transfer + +private: + void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label); + +public: + void resolve_texture(RDD::TextureID p_src_texture, RDD::TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, RDD::TextureID p_dst_texture, RDD::TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap); + void clear_color_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, const Color &p_color, const RDD::TextureSubresourceRange &p_subresources); + void clear_buffer(RDD::BufferID p_buffer, uint64_t p_offset, uint64_t p_size); + void copy_buffer(RDD::BufferID p_src_buffer, RDD::BufferID p_dst_buffer, VectorView p_regions); + void copy_texture(RDD::TextureID p_src_texture, RDD::TextureID p_dst_texture, VectorView p_regions); + void copy_buffer_to_texture(RDD::BufferID p_src_buffer, RDD::TextureID p_dst_texture, VectorView p_regions); + void copy_texture_to_buffer(RDD::TextureID p_src_texture, RDD::BufferID p_dst_buffer, VectorView p_regions); + +#pragma mark - Debugging + + void begin_label(const char *p_label_name, const Color &p_color); + void end_label(); + MDCommandBuffer(id p_queue, RenderingDeviceDriverMetal *p_device_driver) : device_driver(p_device_driver), queue(p_queue) { type = MDCommandBufferStateType::None; @@ -615,44 +719,44 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer { #define MTLBindingAccessWriteOnly MTLArgumentAccessWriteOnly #endif -struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) BindingInfo { +struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformInfo { + uint32_t binding; + BitField active_stages; MTLDataType dataType = MTLDataTypeNone; - uint32_t index = 0; MTLBindingAccess access = MTLBindingAccessReadOnly; MTLResourceUsage usage = 0; MTLTextureType textureType = MTLTextureType2D; - int imageFormat = 0; + uint32_t imageFormat = 0; uint32_t arrayLength = 0; - bool isMultisampled = false; - - inline MTLArgumentDescriptor *new_argument_descriptor() const { - MTLArgumentDescriptor *desc = MTLArgumentDescriptor.argumentDescriptor; - desc.dataType = dataType; - desc.index = index; - desc.access = access; - desc.textureType = textureType; - desc.arrayLength = arrayLength; - return desc; - } -}; + bool isMultisampled = 0; -using RDC = RenderingDeviceCommons; + struct Indexes { + uint32_t buffer = UINT32_MAX; + uint32_t texture = UINT32_MAX; + uint32_t sampler = UINT32_MAX; + }; + Indexes slot; + Indexes arg_buffer; -typedef API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) HashMap BindingInfoMap; + enum class IndexType { + SLOT, + ARG, + }; -struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformInfo { - uint32_t binding; - ShaderStageUsage active_stages = None; - BindingInfoMap bindings; - BindingInfoMap bindings_secondary; + _FORCE_INLINE_ Indexes &get_indexes(IndexType p_type) { + switch (p_type) { + case IndexType::SLOT: + return slot; + case IndexType::ARG: + return arg_buffer; + } + } }; struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformSet { LocalVector uniforms; LocalVector dynamic_uniforms; uint32_t buffer_size = 0; - HashMap offsets; - HashMap> encoders; }; struct ShaderCacheEntry; @@ -691,13 +795,6 @@ enum class ShaderLoadStrategy { data:(dispatch_data_t)data; @end -template <> -struct HashMapComparatorDefault { - static bool compare(const SHA256Digest &p_lhs, const SHA256Digest &p_rhs) { - return memcmp(p_lhs.data, p_rhs.data, CC_SHA256_DIGEST_LENGTH) == 0; - } -}; - /// A cache entry for a Metal shader library. struct ShaderCacheEntry { RenderingDeviceDriverMetal &owner; @@ -718,16 +815,6 @@ struct ShaderCacheEntry { ~ShaderCacheEntry() = default; }; -/// Godot limits the number of dynamic buffers to 8. -/// -/// This is a minimum guarantee for Vulkan. -constexpr uint32_t MAX_DYNAMIC_BUFFERS = 8; - -/// Maximum number of queued frames. -/// -/// See setting: rendering/rendering_device/vsync/frame_queue_size -constexpr uint32_t MAX_FRAME_COUNT = 4; - class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) DynamicOffsetLayout { struct Data { uint8_t offset : 4; @@ -760,19 +847,15 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) DynamicOffsetLayout { } }; -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) DynamicOffsets { - uint32_t data; - -public: - _FORCE_INLINE_ uint32_t get_frame_index(const DynamicOffsetLayout &p_layout) const { - return data; - } -}; - class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDShader { public: CharString name; Vector sets; + struct { + BitField stages = {}; + uint32_t binding = UINT32_MAX; + uint32_t size = 0; + } push_constants; DynamicOffsetLayout dynamic_offset_layout; bool uses_argument_buffers = true; @@ -783,10 +866,6 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDShader { class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputeShader final : public MDShader { public: - struct { - int32_t binding = -1; - uint32_t size = 0; - } push_constants; MTLSize local = {}; MDLibrary *kernel; @@ -796,16 +875,6 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputeShader final : class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDRenderShader final : public MDShader { public: - struct { - struct { - int32_t binding = -1; - uint32_t size = 0; - } vert; - struct { - int32_t binding = -1; - uint32_t size = 0; - } frag; - } push_constants; bool needs_view_mask_buffer = false; MDLibrary *vert; @@ -838,52 +907,69 @@ struct HashMapComparatorDefault { } }; -struct BoundUniformSet { - id buffer; - ResourceUsageMap usage_to_resources; - /// Size of the per-frame buffer, which is 0 when there are no dynamic uniforms. - uint32_t frame_size = 0; - - /// Perform a 2-way merge each key of `ResourceVector` resources from this set into the - /// destination set. - /// - /// Assumes the vectors of resources are sorted. - void merge_into(ResourceUsageMap &p_dst) const; +template <> +struct HashMapComparatorDefault { + static bool compare(const RDD::BufferID &p_lhs, const RDD::BufferID &p_rhs) { + return p_lhs.id == p_rhs.id; + } +}; - /// Returns true if this bound uniform set contains dynamic uniforms. - _FORCE_INLINE_ bool is_dynamic() const { return frame_size > 0; } +template <> +struct HashMapComparatorDefault { + static bool compare(const RDD::TextureID &p_lhs, const RDD::TextureID &p_rhs) { + return p_lhs.id == p_rhs.id; + } +}; - /// Calculate the offset in the Metal buffer for the current frame. - _FORCE_INLINE_ uint32_t frame_offset(uint32_t p_frame_index) const { return p_frame_index * frame_size; } +template <> +struct HashMapHasherDefaultImpl { + static _FORCE_INLINE_ uint32_t hash(const RDD::BufferID &p_value) { + return HashMapHasherDefaultImpl::hash(p_value.id); + } +}; - /// Calculate the offset in the buffer for the given frame index and base offset. - _FORCE_INLINE_ uint32_t make_offset(uint32_t p_frame_index, uint32_t p_base_offset) const { - return frame_offset(p_frame_index) + p_base_offset; +template <> +struct HashMapHasherDefaultImpl { + static _FORCE_INLINE_ uint32_t hash(const RDD::TextureID &p_value) { + return HashMapHasherDefaultImpl::hash(p_value.id); } +}; - BoundUniformSet() = default; - BoundUniformSet(id p_buffer, ResourceUsageMap &&p_usage_to_resources, uint32_t p_frame_size) : - buffer(p_buffer), usage_to_resources(std::move(p_usage_to_resources)), frame_size(p_frame_size) {} +// A type used to encode resources directly to a MTLCommandEncoder +struct DirectEncoder { + id __unsafe_unretained encoder; + BindingCache &cache; + enum Mode { + RENDER, + COMPUTE + }; + Mode mode; + + void set(id __unsafe_unretained *p_buffers, const NSUInteger *p_offsets, NSRange p_range); + void set(id __unsafe_unretained p_buffer, const NSUInteger p_offset, uint32_t p_index); + void set(id __unsafe_unretained *p_textures, NSRange p_range); + void set(id __unsafe_unretained *p_samplers, NSRange p_range); + + DirectEncoder(id __unsafe_unretained p_encoder, BindingCache &p_cache) : + encoder(p_encoder), cache(p_cache) { + if ([p_encoder conformsToProtocol:@protocol(MTLRenderCommandEncoder)]) { + mode = RENDER; + } else { + mode = COMPUTE; + } + } }; class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDUniformSet { -private: - void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count); - void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets); - void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count); - void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets); - - void update_dynamic_uniforms(MDShader *p_shader, ResourceUsageMap &p_resource_usage, uint32_t p_set_index, BoundUniformSet &p_bound_set, uint32_t p_dynamic_offsets, uint32_t p_frame_idx); - public: uint32_t index = 0; + id arg_buffer = nil; + ResourceUsageMap usage_to_resources; LocalVector uniforms; - HashMap bound_uniforms; - - void bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count); - void bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count); - BoundUniformSet &bound_uniform_set(MDShader *p_shader, id p_device, ResourceUsageMap &p_resource_usage, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count); + void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count); + void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count); + void bind_uniforms_direct(MDShader *p_shader, DirectEncoder p_enc, uint32_t p_set_index, uint32_t p_dynamic_offsets); }; class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDPipeline { @@ -986,72 +1072,13 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputePipeline final ~MDComputePipeline() final = default; }; -class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDFrameBuffer { - Vector textures; - -public: - Size2i size; - MDFrameBuffer(Vector p_textures, Size2i p_size) : - textures(p_textures), size(p_size) {} - MDFrameBuffer() {} - - /// Returns the texture at the given index. - _ALWAYS_INLINE_ MTL::Texture get_texture(uint32_t p_idx) const { - return textures[p_idx]; - } - - /// Returns true if the texture at the given index is not nil. - _ALWAYS_INLINE_ bool has_texture(uint32_t p_idx) const { - return textures[p_idx] != nil; - } - - /// Set the texture at the given index. - _ALWAYS_INLINE_ void set_texture(uint32_t p_idx, MTL::Texture p_texture) { - textures.write[p_idx] = p_texture; - } - - /// Unset or nil the texture at the given index. - _ALWAYS_INLINE_ void unset_texture(uint32_t p_idx) { - textures.write[p_idx] = nil; - } - - /// Resizes buffers to the specified size. - _ALWAYS_INLINE_ void set_texture_count(uint32_t p_size) { - textures.resize(p_size); - } - - virtual ~MDFrameBuffer() = default; -}; - -// These functions are used to convert between Objective-C objects and -// the RIDs used by Godot, respecting automatic reference counting. namespace rid { - -// Converts an Objective-C object to a pointer, and incrementing the -// reference count. -_FORCE_INLINE_ void *owned(id p_id) { - return (__bridge_retained void *)p_id; -} - #define MAKE_ID(FROM, TO) \ _FORCE_INLINE_ TO make(FROM p_obj) { \ return TO(owned(p_obj)); \ } -MAKE_ID(id, RDD::TextureID) -MAKE_ID(id, RDD::BufferID) -MAKE_ID(id, RDD::SamplerID) -MAKE_ID(MTLVertexDescriptor *, RDD::VertexFormatID) MAKE_ID(id, RDD::CommandPoolID) -// Converts a pointer to an Objective-C object without changing the reference count. -_FORCE_INLINE_ auto get(RDD::ID p_id) { - return (p_id.id) ? (__bridge ::id)(void *)p_id.id : nil; -} - -// Converts a pointer to an Objective-C object, and decrements the reference count. -_FORCE_INLINE_ auto release(RDD::ID p_id) { - return (__bridge_transfer ::id)(void *)p_id.id; -} - -} // namespace rid +#undef MAKE_ID +} //namespace rid diff --git a/drivers/metal/metal_objects.mm b/drivers/metal/metal_objects.mm index 8d439ef342a0..4a85081b2bc0 100644 --- a/drivers/metal/metal_objects.mm +++ b/drivers/metal/metal_objects.mm @@ -62,9 +62,19 @@ #undef MIN #undef MAX +void MDCommandBuffer::begin_label(const char *p_label_name, const Color &p_color) { + NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)p_label_name length:strlen(p_label_name) encoding:NSUTF8StringEncoding freeWhenDone:NO]; + [commandBuffer pushDebugGroup:s]; +} + +void MDCommandBuffer::end_label() { + [commandBuffer popDebugGroup]; +} + void MDCommandBuffer::begin() { DEV_ASSERT(commandBuffer == nil && !state_begin); state_begin = true; + binding_cache.clear(); } void MDCommandBuffer::end() { @@ -149,6 +159,7 @@ // capturing a Metal frame in Xcode. // // If we don't mark as dirty, then some bindings will generate a validation error. + binding_cache.clear(); render.mark_uniforms_dirty(); if (render.pipeline != nullptr && render.pipeline->depth_stencil != rp->depth_stencil) { render.dirty.set_flag(RenderState::DIRTY_DEPTH); @@ -164,6 +175,7 @@ if (compute.pipeline != p) { compute.dirty.set_flag(ComputeState::DIRTY_PIPELINE); + binding_cache.clear(); compute.mark_uniforms_dirty(); compute.pipeline = (MDComputePipeline *)p; } @@ -172,30 +184,29 @@ void MDCommandBuffer::encode_push_constant_data(RDD::ShaderID p_shader, VectorView p_data) { switch (type) { - case MDCommandBufferStateType::Render: { - MDRenderShader *shader = (MDRenderShader *)(p_shader.id); - if (shader->push_constants.vert.binding == -1 && shader->push_constants.frag.binding == -1) { - return; - } - render.push_constant_bindings[0] = shader->push_constants.vert.binding; - render.push_constant_bindings[1] = shader->push_constants.frag.binding; - void const *ptr = p_data.ptr(); - render.push_constant_data_len = p_data.size() * sizeof(uint32_t); - DEV_ASSERT(render.push_constant_data_len <= sizeof(RenderState::push_constant_data)); - memcpy(render.push_constant_data, ptr, render.push_constant_data_len); - render.mark_push_constants_dirty(); - } break; + case MDCommandBufferStateType::Render: case MDCommandBufferStateType::Compute: { - MDComputeShader *shader = (MDComputeShader *)(p_shader.id); - if (shader->push_constants.binding == -1) { + MDShader *shader = (MDShader *)(p_shader.id); + if (shader->push_constants.binding == UINT32_MAX) { return; } - compute.push_constant_bindings[0] = shader->push_constants.binding; + push_constant_binding = shader->push_constants.binding; void const *ptr = p_data.ptr(); - compute.push_constant_data_len = p_data.size() * sizeof(uint32_t); - DEV_ASSERT(compute.push_constant_data_len <= sizeof(ComputeState::push_constant_data)); - memcpy(compute.push_constant_data, ptr, compute.push_constant_data_len); - compute.mark_push_constants_dirty(); + push_constant_data_len = p_data.size() * sizeof(uint32_t); + DEV_ASSERT(push_constant_data_len <= sizeof(push_constant_data)); + memcpy(push_constant_data, ptr, push_constant_data_len); + if (push_constant_data_len > 0) { + switch (type) { + case MDCommandBufferStateType::Render: + render.dirty.set_flag(RenderState::DirtyFlag::DIRTY_PUSH); + break; + case MDCommandBufferStateType::Compute: + compute.dirty.set_flag(ComputeState::DirtyFlag::DIRTY_PUSH); + break; + default: + break; + } + } } break; case MDCommandBufferStateType::Blit: case MDCommandBufferStateType::None: @@ -203,7 +214,7 @@ } } -id MDCommandBuffer::blit_command_encoder() { +id MDCommandBuffer::_ensure_blit_encoder() { switch (type) { case MDCommandBufferStateType::None: break; @@ -222,6 +233,360 @@ return blit.encoder; } +_FORCE_INLINE_ static MTLSize mipmapLevelSizeFromTexture(id p_tex, NSUInteger p_level) { + MTLSize lvlSize; + lvlSize.width = MAX(p_tex.width >> p_level, 1UL); + lvlSize.height = MAX(p_tex.height >> p_level, 1UL); + lvlSize.depth = MAX(p_tex.depth >> p_level, 1UL); + return lvlSize; +} + +void MDCommandBuffer::resolve_texture(RDD::TextureID p_src_texture, RDD::TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, RDD::TextureID p_dst_texture, RDD::TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) { + id src_tex = rid::get(p_src_texture); + id dst_tex = rid::get(p_dst_texture); + + MTLRenderPassDescriptor *mtlRPD = [MTLRenderPassDescriptor renderPassDescriptor]; + MTLRenderPassColorAttachmentDescriptor *mtlColorAttDesc = mtlRPD.colorAttachments[0]; + mtlColorAttDesc.loadAction = MTLLoadActionLoad; + mtlColorAttDesc.storeAction = MTLStoreActionMultisampleResolve; + + mtlColorAttDesc.texture = src_tex; + mtlColorAttDesc.resolveTexture = dst_tex; + mtlColorAttDesc.level = p_src_mipmap; + mtlColorAttDesc.slice = p_src_layer; + mtlColorAttDesc.resolveLevel = p_dst_mipmap; + mtlColorAttDesc.resolveSlice = p_dst_layer; + encodeRenderCommandEncoderWithDescriptor(mtlRPD, @"Resolve Image"); +} + +void MDCommandBuffer::clear_color_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, const Color &p_color, const RDD::TextureSubresourceRange &p_subresources) { + id src_tex = rid::get(p_texture); + + if (src_tex.parentTexture) { + // Clear via the parent texture rather than the view. + src_tex = src_tex.parentTexture; + } + + PixelFormats &pf = device_driver->get_pixel_formats(); + + if (pf.isDepthFormat(src_tex.pixelFormat) || pf.isStencilFormat(src_tex.pixelFormat)) { + ERR_FAIL_MSG("invalid: depth or stencil texture format"); + } + + MTLRenderPassDescriptor *desc = MTLRenderPassDescriptor.renderPassDescriptor; + + if (p_subresources.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) { + MTLRenderPassColorAttachmentDescriptor *caDesc = desc.colorAttachments[0]; + caDesc.texture = src_tex; + caDesc.loadAction = MTLLoadActionClear; + caDesc.storeAction = MTLStoreActionStore; + caDesc.clearColor = MTLClearColorMake(p_color.r, p_color.g, p_color.b, p_color.a); + + // Extract the mipmap levels that are to be updated. + uint32_t mipLvlStart = p_subresources.base_mipmap; + uint32_t mipLvlCnt = p_subresources.mipmap_count; + uint32_t mipLvlEnd = mipLvlStart + mipLvlCnt; + + uint32_t levelCount = src_tex.mipmapLevelCount; + + // Extract the cube or array layers (slices) that are to be updated. + bool is3D = src_tex.textureType == MTLTextureType3D; + uint32_t layerStart = is3D ? 0 : p_subresources.base_layer; + uint32_t layerCnt = p_subresources.layer_count; + uint32_t layerEnd = layerStart + layerCnt; + + MetalFeatures const &features = device_driver->get_device_properties().features; + + // Iterate across mipmap levels and layers, and perform and empty render to clear each. + for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) { + ERR_FAIL_INDEX_MSG(mipLvl, levelCount, "mip level out of range"); + + caDesc.level = mipLvl; + + // If a 3D image, we need to get the depth for each level. + if (is3D) { + layerCnt = mipmapLevelSizeFromTexture(src_tex, mipLvl).depth; + layerEnd = layerStart + layerCnt; + } + + if ((features.layeredRendering && src_tex.sampleCount == 1) || features.multisampleLayeredRendering) { + // We can clear all layers at once. + if (is3D) { + caDesc.depthPlane = layerStart; + } else { + caDesc.slice = layerStart; + } + desc.renderTargetArrayLength = layerCnt; + encodeRenderCommandEncoderWithDescriptor(desc, @"Clear Image"); + } else { + for (uint32_t layer = layerStart; layer < layerEnd; layer++) { + if (is3D) { + caDesc.depthPlane = layer; + } else { + caDesc.slice = layer; + } + encodeRenderCommandEncoderWithDescriptor(desc, @"Clear Image"); + } + } + } + } +} + +void MDCommandBuffer::clear_buffer(RDD::BufferID p_buffer, uint64_t p_offset, uint64_t p_size) { + id blit_enc = _ensure_blit_encoder(); + const RDM::BufferInfo *buffer = (const RDM::BufferInfo *)p_buffer.id; + + [blit_enc fillBuffer:buffer->metal_buffer + range:NSMakeRange(p_offset, p_size) + value:0]; +} + +void MDCommandBuffer::copy_buffer(RDD::BufferID p_src_buffer, RDD::BufferID p_dst_buffer, VectorView p_regions) { + const RDM::BufferInfo *src = (const RDM::BufferInfo *)p_src_buffer.id; + const RDM::BufferInfo *dst = (const RDM::BufferInfo *)p_dst_buffer.id; + + id enc = _ensure_blit_encoder(); + + for (uint32_t i = 0; i < p_regions.size(); i++) { + RDD::BufferCopyRegion region = p_regions[i]; + [enc copyFromBuffer:src->metal_buffer + sourceOffset:region.src_offset + toBuffer:dst->metal_buffer + destinationOffset:region.dst_offset + size:region.size]; + } +} + +static MTLSize MTLSizeFromVector3i(Vector3i p_size) { + return MTLSizeMake(p_size.x, p_size.y, p_size.z); +} + +static MTLOrigin MTLOriginFromVector3i(Vector3i p_origin) { + return MTLOriginMake(p_origin.x, p_origin.y, p_origin.z); +} + +// Clamps the size so that the sum of the origin and size do not exceed the maximum size. +static inline MTLSize clampMTLSize(MTLSize p_size, MTLOrigin p_origin, MTLSize p_max_size) { + MTLSize clamped; + clamped.width = MIN(p_size.width, p_max_size.width - p_origin.x); + clamped.height = MIN(p_size.height, p_max_size.height - p_origin.y); + clamped.depth = MIN(p_size.depth, p_max_size.depth - p_origin.z); + return clamped; +} + +API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) +static bool isArrayTexture(MTLTextureType p_type) { + return (p_type == MTLTextureType3D || + p_type == MTLTextureType2DArray || + p_type == MTLTextureType2DMultisampleArray || + p_type == MTLTextureType1DArray); +} + +_FORCE_INLINE_ static bool operator==(MTLSize p_a, MTLSize p_b) { + return p_a.width == p_b.width && p_a.height == p_b.height && p_a.depth == p_b.depth; +} + +void MDCommandBuffer::copy_texture(RDD::TextureID p_src_texture, RDD::TextureID p_dst_texture, VectorView p_regions) { + id src = rid::get(p_src_texture); + id dst = rid::get(p_dst_texture); + + id enc = _ensure_blit_encoder(); + PixelFormats &pf = device_driver->get_pixel_formats(); + + MTLPixelFormat src_fmt = src.pixelFormat; + bool src_is_compressed = pf.getFormatType(src_fmt) == MTLFormatType::Compressed; + MTLPixelFormat dst_fmt = dst.pixelFormat; + bool dst_is_compressed = pf.getFormatType(dst_fmt) == MTLFormatType::Compressed; + + // Validate copy. + if (src.sampleCount != dst.sampleCount || pf.getBytesPerBlock(src_fmt) != pf.getBytesPerBlock(dst_fmt)) { + ERR_FAIL_MSG("Cannot copy between incompatible pixel formats, such as formats of different pixel sizes, or between images with different sample counts."); + } + + // If source and destination have different formats and at least one is compressed, a temporary buffer is required. + bool need_tmp_buffer = (src_fmt != dst_fmt) && (src_is_compressed || dst_is_compressed); + if (need_tmp_buffer) { + ERR_FAIL_MSG("not implemented: copy with intermediate buffer"); + } + + if (src_fmt != dst_fmt) { + // Map the source pixel format to the dst through a texture view on the source texture. + src = [src newTextureViewWithPixelFormat:dst_fmt]; + } + + for (uint32_t i = 0; i < p_regions.size(); i++) { + RDD::TextureCopyRegion region = p_regions[i]; + + MTLSize extent = MTLSizeFromVector3i(region.size); + + // If copies can be performed using direct texture-texture copying, do so. + uint32_t src_level = region.src_subresources.mipmap; + uint32_t src_base_layer = region.src_subresources.base_layer; + MTLSize src_extent = mipmapLevelSizeFromTexture(src, src_level); + uint32_t dst_level = region.dst_subresources.mipmap; + uint32_t dst_base_layer = region.dst_subresources.base_layer; + MTLSize dst_extent = mipmapLevelSizeFromTexture(dst, dst_level); + + // All layers may be copied at once, if the extent completely covers both images. + if (src_extent == extent && dst_extent == extent) { + [enc copyFromTexture:src + sourceSlice:src_base_layer + sourceLevel:src_level + toTexture:dst + destinationSlice:dst_base_layer + destinationLevel:dst_level + sliceCount:region.src_subresources.layer_count + levelCount:1]; + } else { + MTLOrigin src_origin = MTLOriginFromVector3i(region.src_offset); + MTLSize src_size = clampMTLSize(extent, src_origin, src_extent); + uint32_t layer_count = 0; + if ((src.textureType == MTLTextureType3D) != (dst.textureType == MTLTextureType3D)) { + // In the case, the number of layers to copy is in extent.depth. Use that value, + // then clamp the depth, so we don't try to copy more than Metal will allow. + layer_count = extent.depth; + src_size.depth = 1; + } else { + layer_count = region.src_subresources.layer_count; + } + MTLOrigin dst_origin = MTLOriginFromVector3i(region.dst_offset); + + for (uint32_t layer = 0; layer < layer_count; layer++) { + // We can copy between a 3D and a 2D image easily. Just copy between + // one slice of the 2D image and one plane of the 3D image at a time. + if ((src.textureType == MTLTextureType3D) == (dst.textureType == MTLTextureType3D)) { + [enc copyFromTexture:src + sourceSlice:src_base_layer + layer + sourceLevel:src_level + sourceOrigin:src_origin + sourceSize:src_size + toTexture:dst + destinationSlice:dst_base_layer + layer + destinationLevel:dst_level + destinationOrigin:dst_origin]; + } else if (src.textureType == MTLTextureType3D) { + [enc copyFromTexture:src + sourceSlice:src_base_layer + sourceLevel:src_level + sourceOrigin:MTLOriginMake(src_origin.x, src_origin.y, src_origin.z + layer) + sourceSize:src_size + toTexture:dst + destinationSlice:dst_base_layer + layer + destinationLevel:dst_level + destinationOrigin:dst_origin]; + } else { + DEV_ASSERT(dst.textureType == MTLTextureType3D); + [enc copyFromTexture:src + sourceSlice:src_base_layer + layer + sourceLevel:src_level + sourceOrigin:src_origin + sourceSize:src_size + toTexture:dst + destinationSlice:dst_base_layer + destinationLevel:dst_level + destinationOrigin:MTLOriginMake(dst_origin.x, dst_origin.y, dst_origin.z + layer)]; + } + } + } + } +} + +void MDCommandBuffer::copy_buffer_to_texture(RDD::BufferID p_src_buffer, RDD::TextureID p_dst_texture, VectorView p_regions) { + _copy_texture_buffer(CopySource::Buffer, p_dst_texture, p_src_buffer, p_regions); +} + +void MDCommandBuffer::copy_texture_to_buffer(RDD::TextureID p_src_texture, RDD::BufferID p_dst_buffer, VectorView p_regions) { + _copy_texture_buffer(CopySource::Texture, p_src_texture, p_dst_buffer, p_regions); +} + +void MDCommandBuffer::_copy_texture_buffer(CopySource p_source, + RDD::TextureID p_texture, + RDD::BufferID p_buffer, + VectorView p_regions) { + const RDM::BufferInfo *buffer = (const RDM::BufferInfo *)p_buffer.id; + id texture = rid::get(p_texture); + + id enc = _ensure_blit_encoder(); + + PixelFormats &pf = device_driver->get_pixel_formats(); + MTLPixelFormat mtlPixFmt = texture.pixelFormat; + + MTLBlitOption options = MTLBlitOptionNone; + if (pf.isPVRTCFormat(mtlPixFmt)) { + options |= MTLBlitOptionRowLinearPVRTC; + } + + for (uint32_t i = 0; i < p_regions.size(); i++) { + RDD::BufferTextureCopyRegion region = p_regions[i]; + + uint32_t mip_level = region.texture_subresources.mipmap; + MTLOrigin txt_origin = MTLOriginMake(region.texture_offset.x, region.texture_offset.y, region.texture_offset.z); + MTLSize src_extent = mipmapLevelSizeFromTexture(texture, mip_level); + MTLSize txt_size = clampMTLSize(MTLSizeMake(region.texture_region_size.x, region.texture_region_size.y, region.texture_region_size.z), + txt_origin, + src_extent); + + uint32_t buffImgWd = region.texture_region_size.x; + uint32_t buffImgHt = region.texture_region_size.y; + + NSUInteger bytesPerRow = pf.getBytesPerRow(mtlPixFmt, buffImgWd); + NSUInteger bytesPerImg = pf.getBytesPerLayer(mtlPixFmt, bytesPerRow, buffImgHt); + + MTLBlitOption blit_options = options; + + if (pf.isDepthFormat(mtlPixFmt) && pf.isStencilFormat(mtlPixFmt)) { + bool want_depth = flags::all(region.texture_subresources.aspect, RDD::TEXTURE_ASPECT_DEPTH_BIT); + bool want_stencil = flags::all(region.texture_subresources.aspect, RDD::TEXTURE_ASPECT_STENCIL_BIT); + + // The stencil component is always 1 byte per pixel. + // Don't reduce depths of 32-bit depth/stencil formats. + if (want_depth && !want_stencil) { + if (pf.getBytesPerTexel(mtlPixFmt) != 4) { + bytesPerRow -= buffImgWd; + bytesPerImg -= buffImgWd * buffImgHt; + } + blit_options |= MTLBlitOptionDepthFromDepthStencil; + } else if (want_stencil && !want_depth) { + bytesPerRow = buffImgWd; + bytesPerImg = buffImgWd * buffImgHt; + blit_options |= MTLBlitOptionStencilFromDepthStencil; + } + } + + if (!isArrayTexture(texture.textureType)) { + bytesPerImg = 0; + } + + if (p_source == CopySource::Buffer) { + for (uint32_t lyrIdx = 0; lyrIdx < region.texture_subresources.layer_count; lyrIdx++) { + [enc copyFromBuffer:buffer->metal_buffer + sourceOffset:region.buffer_offset + (bytesPerImg * lyrIdx) + sourceBytesPerRow:bytesPerRow + sourceBytesPerImage:bytesPerImg + sourceSize:txt_size + toTexture:texture + destinationSlice:region.texture_subresources.base_layer + lyrIdx + destinationLevel:mip_level + destinationOrigin:txt_origin + options:blit_options]; + } + } else { + for (uint32_t lyrIdx = 0; lyrIdx < region.texture_subresources.layer_count; lyrIdx++) { + [enc copyFromTexture:texture + sourceSlice:region.texture_subresources.base_layer + lyrIdx + sourceLevel:mip_level + sourceOrigin:txt_origin + sourceSize:txt_size + toBuffer:buffer->metal_buffer + destinationOffset:region.buffer_offset + (bytesPerImg * lyrIdx) + destinationBytesPerRow:bytesPerRow + destinationBytesPerImage:bytesPerImg + options:blit_options]; + } + } + } +} + void MDCommandBuffer::encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label) { switch (type) { case MDCommandBufferStateType::None: @@ -358,6 +723,7 @@ [enc popDebugGroup]; render.dirty.set_flag((RenderState::DirtyFlag)(RenderState::DIRTY_PIPELINE | RenderState::DIRTY_DEPTH | RenderState::DIRTY_RASTER)); + binding_cache.clear(); render.mark_uniforms_dirty({ 0 }); // Mark index 0 dirty, if there is already a binding for index 0. render.mark_viewport_dirty(); render.mark_scissors_dirty(); @@ -369,15 +735,13 @@ _render_bind_uniform_sets(); if (render.dirty.has_flag(RenderState::DIRTY_PUSH)) { - if (render.push_constant_bindings[0] != (uint32_t)-1) { - [render.encoder setVertexBytes:render.push_constant_data - length:render.push_constant_data_len - atIndex:render.push_constant_bindings[0]]; - } - if (render.push_constant_bindings[1] != (uint32_t)-1) { - [render.encoder setFragmentBytes:render.push_constant_data - length:render.push_constant_data_len - atIndex:render.push_constant_bindings[1]]; + if (push_constant_binding != UINT32_MAX) { + [render.encoder setVertexBytes:push_constant_data + length:push_constant_data_len + atIndex:push_constant_binding]; + [render.encoder setFragmentBytes:push_constant_data + length:push_constant_data_len + atIndex:push_constant_binding]; } } @@ -425,6 +789,8 @@ withRange:NSMakeRange(first, p_binding_count)]; } + render.resource_tracker.encode(render.encoder); + render.dirty.clear(); } @@ -468,24 +834,33 @@ } } -void BoundUniformSet::merge_into(ResourceUsageMap &p_dst) const { - for (KeyValue const &keyval : usage_to_resources) { - ResourceVector *resources = p_dst.getptr(keyval.key); +void ResourceTracker::merge_from(const ResourceUsageMap &p_from) { + for (KeyValue const &keyval : p_from) { + ResourceVector *resources = _current.getptr(keyval.key); if (resources == nullptr) { - resources = &p_dst.insert(keyval.key, ResourceVector())->value; + resources = &_current.insert(keyval.key, ResourceVector())->value; } // Reserve space for the new resources, assuming they are all added. resources->reserve(resources->size() + keyval.value.size()); uint32_t i = 0, j = 0; - __unsafe_unretained id *resources_ptr = resources->ptr(); - const __unsafe_unretained id *keyval_ptr = keyval.value.ptr(); + MTLResourceUnsafe *resources_ptr = resources->ptr(); + const MTLResourceUnsafe *keyval_ptr = keyval.value.ptr(); // 2-way merge. while (i < resources->size() && j < keyval.value.size()) { if (resources_ptr[i] < keyval_ptr[j]) { i++; } else if (resources_ptr[i] > keyval_ptr[j]) { - resources->insert(i, keyval_ptr[j]); + ResourceUsageEntry *existing = nullptr; + if ((existing = _previous.getptr(keyval_ptr[j])) == nullptr) { + existing = &_previous.insert(keyval_ptr[j], keyval.key)->value; + resources->insert(i, keyval_ptr[j]); + } else { + if (existing->usage != keyval.key) { + existing->usage |= keyval.key; + resources->insert(i, keyval_ptr[j]); + } + } i++; j++; } else { @@ -495,9 +870,82 @@ } // Append the remaining resources. for (; j < keyval.value.size(); j++) { - resources->push_back(keyval_ptr[j]); + ResourceUsageEntry *existing = nullptr; + if ((existing = _previous.getptr(keyval_ptr[j])) == nullptr) { + existing = &_previous.insert(keyval_ptr[j], keyval.key)->value; + resources->push_back(keyval_ptr[j]); + } else { + if (existing->usage != keyval.key) { + existing->usage |= keyval.key; + resources->push_back(keyval_ptr[j]); + } + } + } + } +} + +void ResourceTracker::encode(id __unsafe_unretained p_enc) { + for (KeyValue const &keyval : _current) { + if (keyval.value.is_empty()) { + continue; + } + + MTLResourceUsage vert_usage = resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_VERTEX); + MTLResourceUsage frag_usage = resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_FRAGMENT); + if (vert_usage == frag_usage) { + [p_enc useResources:keyval.value.ptr() count:keyval.value.size() usage:vert_usage stages:MTLRenderStageVertex | MTLRenderStageFragment]; + } else { + if (vert_usage != 0) { + [p_enc useResources:keyval.value.ptr() count:keyval.value.size() usage:vert_usage stages:MTLRenderStageVertex]; + } + if (frag_usage != 0) { + [p_enc useResources:keyval.value.ptr() count:keyval.value.size() usage:frag_usage stages:MTLRenderStageFragment]; + } + } + } + + // Keep the keys for now and clear the vectors to reduce churn. + for (KeyValue &v : _current) { + v.value.clear(); + } +} + +void ResourceTracker::encode(id __unsafe_unretained p_enc) { + for (KeyValue const &keyval : _current) { + if (keyval.value.is_empty()) { + continue; + } + MTLResourceUsage usage = resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_COMPUTE); + if (usage != 0) { + [p_enc useResources:keyval.value.ptr() count:keyval.value.size() usage:usage]; + } + } + + // Keep the keys for now and clear the vectors to reduce churn. + for (KeyValue &v : _current) { + v.value.clear(); + } +} + +void ResourceTracker::reset() { + // Keep the keys for now, as they are likely to be used repeatedly. + for (KeyValue &v : _previous) { + if (v.value.usage == ResourceUnused) { + v.value.unused++; + if (v.value.unused >= RESOURCE_UNUSED_CLEANUP_COUNT) { + _scratch.push_back(v.key); + } + } else { + v.value = ResourceUnused; + v.value.unused = 0; } } + + // Clear up resources that weren't used for the last pass. + for (const MTLResourceUnsafe &res : _scratch) { + _previous.erase(res); + } + _scratch.clear(); } void MDCommandBuffer::_render_bind_uniform_sets() { @@ -522,7 +970,12 @@ if (set == nullptr || index >= (uint32_t)shader->sets.size()) { continue; } - set->bind_uniforms(shader, render, index, dynamic_offsets, device_driver->frame_index, device_driver->frame_count); + if (shader->uses_argument_buffers) { + set->bind_uniforms_argument_buffers(shader, render, index, dynamic_offsets, device_driver->frame_index(), device_driver->frame_count()); + } else { + DirectEncoder de(render.encoder, binding_cache); + set->bind_uniforms_direct(shader, de, index, dynamic_offsets); + } } } @@ -917,7 +1370,7 @@ render.end_encoding(); render.reset(); - type = MDCommandBufferStateType::None; + reset(); } #pragma mark - RenderState @@ -937,19 +1390,13 @@ uniform_sets.clear(); dynamic_offsets = 0; uniform_set_mask = 0; - push_constant_bindings[0] = ~0U; - push_constant_bindings[1] = ~0U; - push_constant_data_len = 0; clear_values.clear(); viewports.clear(); scissors.clear(); blend_constants.reset(); vertex_buffers.clear(); vertex_offsets.clear(); - // Keep the keys, as they are likely to be used again. - for (KeyValue>> &kv : resource_usage) { - kv.value.clear(); - } + resource_tracker.reset(); } void MDCommandBuffer::RenderState::end_encoding() { @@ -957,26 +1404,6 @@ return; } - // Bind all resources. - for (KeyValue const &keyval : resource_usage) { - if (keyval.value.is_empty()) { - continue; - } - - MTLResourceUsage vert_usage = resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_VERTEX); - MTLResourceUsage frag_usage = resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_FRAGMENT); - if (vert_usage == frag_usage) { - [encoder useResources:keyval.value.ptr() count:keyval.value.size() usage:vert_usage stages:MTLRenderStageVertex | MTLRenderStageFragment]; - } else { - if (vert_usage != 0) { - [encoder useResources:keyval.value.ptr() count:keyval.value.size() usage:vert_usage stages:MTLRenderStageVertex]; - } - if (frag_usage != 0) { - [encoder useResources:keyval.value.ptr() count:keyval.value.size() usage:frag_usage stages:MTLRenderStageFragment]; - } - } - } - [encoder endEncoding]; encoder = nil; } @@ -988,17 +1415,6 @@ return; } - // Bind all resources. - for (KeyValue const &keyval : resource_usage) { - if (keyval.value.is_empty()) { - continue; - } - MTLResourceUsage usage = resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_COMPUTE); - if (usage != 0) { - [encoder useResources:keyval.value.ptr() count:keyval.value.size() usage:usage]; - } - } - [encoder endEncoding]; encoder = nil; } @@ -1014,13 +1430,15 @@ _compute_bind_uniform_sets(); if (compute.dirty.has_flag(ComputeState::DIRTY_PUSH)) { - if (compute.push_constant_bindings[0] != (uint32_t)-1) { - [compute.encoder setBytes:compute.push_constant_data - length:compute.push_constant_data_len - atIndex:compute.push_constant_bindings[0]]; + if (push_constant_binding != UINT32_MAX) { + [compute.encoder setBytes:push_constant_data + length:push_constant_data_len + atIndex:push_constant_binding]; } } + compute.resource_tracker.encode(compute.encoder); + compute.dirty.clear(); } @@ -1046,7 +1464,12 @@ if (set == nullptr || index >= (uint32_t)shader->sets.size()) { continue; } - set->bind_uniforms(shader, compute, index, dynamic_offsets, device_driver->frame_index, device_driver->frame_count); + if (shader->uses_argument_buffers) { + set->bind_uniforms_argument_buffers(shader, compute, index, dynamic_offsets, device_driver->frame_index(), device_driver->frame_count()); + } else { + DirectEncoder de(compute.encoder, binding_cache); + set->bind_uniforms_direct(shader, de, index, dynamic_offsets); + } } } @@ -1057,12 +1480,7 @@ uniform_sets.clear(); dynamic_offsets = 0; uniform_set_mask = 0; - push_constant_bindings[0] = ~0U; - push_constant_data_len = 0; - // Keep the keys, as they are likely to be used again. - for (KeyValue>> &kv : resource_usage) { - kv.value.clear(); - } + resource_tracker.reset(); } void MDCommandBuffer::compute_bind_uniform_sets(VectorView p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) { @@ -1114,12 +1532,17 @@ [enc dispatchThreadgroupsWithIndirectBuffer:indirectBuffer->metal_buffer indirectBufferOffset:p_offset threadsPerThreadgroup:compute.pipeline->compute_state.local]; } +void MDCommandBuffer::reset() { + push_constant_data_len = 0; + type = MDCommandBufferStateType::None; +} + void MDCommandBuffer::_end_compute_dispatch() { DEV_ASSERT(type == MDCommandBufferStateType::Compute); compute.end_encoding(); compute.reset(); - type = MDCommandBufferStateType::None; + reset(); } void MDCommandBuffer::_end_blit() { @@ -1127,7 +1550,7 @@ [blit.encoder endEncoding]; blit.reset(); - type = MDCommandBufferStateType::None; + reset(); } MDComputeShader::MDComputeShader(CharString p_name, @@ -1148,256 +1571,96 @@ frag(p_frag) { } -void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count) { - DEV_ASSERT(p_shader->uses_argument_buffers); - DEV_ASSERT(p_state.encoder != nil); - - UniformSet const &set_info = p_shader->sets[p_set_index]; - - id __unsafe_unretained enc = p_state.encoder; - id __unsafe_unretained device = enc.device; - - BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage, p_set_index, p_dynamic_offsets, p_frame_idx, p_frame_count); - - // Set the buffer for the vertex stage. - { - uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_VERTEX); - if (offset) { - [enc setVertexBuffer:bus.buffer offset:bus.make_offset(p_frame_idx, *offset) atIndex:p_set_index]; - } - } - // Set the buffer for the fragment stage. - { - uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_FRAGMENT); - if (offset) { - [enc setFragmentBuffer:bus.buffer offset:bus.make_offset(p_frame_idx, *offset) atIndex:p_set_index]; +void DirectEncoder::set(__unsafe_unretained id *p_textures, NSRange p_range) { + if (cache.update(p_range, p_textures)) { + switch (mode) { + case RENDER: { + id __unsafe_unretained enc = (id)encoder; + [enc setVertexTextures:p_textures withRange:p_range]; + [enc setFragmentTextures:p_textures withRange:p_range]; + } break; + case COMPUTE: { + id __unsafe_unretained enc = (id)encoder; + [enc setTextures:p_textures withRange:p_range]; + } break; } } } -void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets) { - DEV_ASSERT(!p_shader->uses_argument_buffers); - DEV_ASSERT(p_state.encoder != nil); - - id __unsafe_unretained enc = p_state.encoder; - - UniformSet const &set = p_shader->sets[p_set_index]; - DynamicOffsetLayout layout = p_shader->dynamic_offset_layout; - uint32_t dynamic_index = 0; - - for (uint32_t i = 0; i < MIN(uniforms.size(), set.uniforms.size()); i++) { - RDD::BoundUniform const &uniform = uniforms[i]; - const UniformInfo &ui = set.uniforms[i]; - - uint32_t frame_idx; - if (uniform.is_dynamic()) { - uint32_t shift = layout.get_offset_index_shift(p_set_index, dynamic_index); - dynamic_index++; - frame_idx = (p_dynamic_offsets >> shift) & 0xf; - } else { - frame_idx = 0; +void DirectEncoder::set(__unsafe_unretained id *p_buffers, const NSUInteger *p_offsets, NSRange p_range) { + if (cache.update(p_range, p_buffers, p_offsets)) { + switch (mode) { + case RENDER: { + id __unsafe_unretained enc = (id)encoder; + [enc setVertexBuffers:p_buffers offsets:p_offsets withRange:p_range]; + [enc setFragmentBuffers:p_buffers offsets:p_offsets withRange:p_range]; + } break; + case COMPUTE: { + id __unsafe_unretained enc = (id)encoder; + [enc setBuffers:p_buffers offsets:p_offsets withRange:p_range]; + } break; } + } +} - static const RDC::ShaderStage stage_usages[2] = { RDC::ShaderStage::SHADER_STAGE_VERTEX, RDC::ShaderStage::SHADER_STAGE_FRAGMENT }; - for (const RDC::ShaderStage stage : stage_usages) { - ShaderStageUsage const stage_usage = ShaderStageUsage(1 << stage); - - const BindingInfo *bi = ui.bindings.getptr(stage); - if (bi == nullptr || (ui.active_stages & stage_usage) == 0) { - // No binding for this stage or it is not active - continue; - } - - switch (uniform.type) { - case RDD::UNIFORM_TYPE_SAMPLER: { - size_t count = uniform.ids.size(); - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - objects[j] = rid::get(uniform.ids[j].id); - } - if (stage == RDD::SHADER_STAGE_VERTEX) { - [enc setVertexSamplerStates:objects withRange:NSMakeRange(bi->index, count)]; - } else { - [enc setFragmentSamplerStates:objects withRange:NSMakeRange(bi->index, count)]; - } - } break; - case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: { - size_t count = uniform.ids.size() / 2; - id __unsafe_unretained *textures = ALLOCA_ARRAY(id __unsafe_unretained, count); - id __unsafe_unretained *samplers = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (uint32_t j = 0; j < count; j += 1) { - id sampler = rid::get(uniform.ids[j * 2 + 0]); - id texture = rid::get(uniform.ids[j * 2 + 1]); - samplers[j] = sampler; - textures[j] = texture; - } - const BindingInfo *sbi = ui.bindings_secondary.getptr(stage); - if (sbi) { - if (stage == RDD::SHADER_STAGE_VERTEX) { - [enc setVertexSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)]; - } else { - [enc setFragmentSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)]; - } - } - if (stage == RDD::SHADER_STAGE_VERTEX) { - [enc setVertexTextures:textures withRange:NSMakeRange(bi->index, count)]; - } else { - [enc setFragmentTextures:textures withRange:NSMakeRange(bi->index, count)]; - } - } break; - case RDD::UNIFORM_TYPE_TEXTURE: { - size_t count = uniform.ids.size(); - if (count == 1) { - id obj = rid::get(uniform.ids[0]); - if (stage == RDD::SHADER_STAGE_VERTEX) { - [enc setVertexTexture:obj atIndex:bi->index]; - } else { - [enc setFragmentTexture:obj atIndex:bi->index]; - } - } else { - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - id obj = rid::get(uniform.ids[j]); - objects[j] = obj; - } - if (stage == RDD::SHADER_STAGE_VERTEX) { - [enc setVertexTextures:objects withRange:NSMakeRange(bi->index, count)]; - } else { - [enc setFragmentTextures:objects withRange:NSMakeRange(bi->index, count)]; - } - } - } break; - case RDD::UNIFORM_TYPE_IMAGE: { - size_t count = uniform.ids.size(); - if (count == 1) { - id obj = rid::get(uniform.ids[0]); - if (stage == RDD::SHADER_STAGE_VERTEX) { - [enc setVertexTexture:obj atIndex:bi->index]; - } else { - [enc setFragmentTexture:obj atIndex:bi->index]; - } - - const BindingInfo *sbi = ui.bindings_secondary.getptr(stage); - if (sbi) { - id tex = obj.parentTexture ? obj.parentTexture : obj; - id buf = tex.buffer; - if (buf) { - if (stage == RDD::SHADER_STAGE_VERTEX) { - [enc setVertexBuffer:buf offset:tex.bufferOffset atIndex:sbi->index]; - } else { - [enc setFragmentBuffer:buf offset:tex.bufferOffset atIndex:sbi->index]; - } - } - } - } else { - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - id obj = rid::get(uniform.ids[j]); - objects[j] = obj; - } - if (stage == RDD::SHADER_STAGE_VERTEX) { - [enc setVertexTextures:objects withRange:NSMakeRange(bi->index, count)]; - } else { - [enc setFragmentTextures:objects withRange:NSMakeRange(bi->index, count)]; - } - } - } break; - case RDD::UNIFORM_TYPE_TEXTURE_BUFFER: { - ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER"); - } break; - case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: { - ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER"); - } break; - case RDD::UNIFORM_TYPE_IMAGE_BUFFER: { - CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER"); - } break; - case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: - case RDD::UNIFORM_TYPE_STORAGE_BUFFER: { - const RenderingDeviceDriverMetal::BufferInfo *buf_info = (const RenderingDeviceDriverMetal::BufferInfo *)uniform.ids[0].id; - if (stage == RDD::SHADER_STAGE_VERTEX) { - [enc setVertexBuffer:buf_info->metal_buffer offset:0 atIndex:bi->index]; - } else { - [enc setFragmentBuffer:buf_info->metal_buffer offset:0 atIndex:bi->index]; - } - } break; - case RDD::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: - case RDD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: { - const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id; - if (stage == RDD::SHADER_STAGE_VERTEX) { - [enc setVertexBuffer:buf_info->metal_buffer offset:frame_idx * buf_info->size_bytes atIndex:bi->index]; - } else { - [enc setFragmentBuffer:buf_info->metal_buffer offset:frame_idx * buf_info->size_bytes atIndex:bi->index]; - } - } break; - case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: { - size_t count = uniform.ids.size(); - if (count == 1) { - id obj = rid::get(uniform.ids[0]); - if (stage == RDD::SHADER_STAGE_VERTEX) { - [enc setVertexTexture:obj atIndex:bi->index]; - } else { - [enc setFragmentTexture:obj atIndex:bi->index]; - } - } else { - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - id obj = rid::get(uniform.ids[j]); - objects[j] = obj; - } - - if (stage == RDD::SHADER_STAGE_VERTEX) { - [enc setVertexTextures:objects withRange:NSMakeRange(bi->index, count)]; - } else { - [enc setFragmentTextures:objects withRange:NSMakeRange(bi->index, count)]; - } - } - } break; - default: { - DEV_ASSERT(false); - } - } +void DirectEncoder::set(id __unsafe_unretained p_buffer, const NSUInteger p_offset, uint32_t p_index) { + if (cache.update(p_buffer, p_offset, p_index)) { + switch (mode) { + case RENDER: { + id __unsafe_unretained enc = (id)encoder; + [enc setVertexBuffer:p_buffer offset:p_offset atIndex:p_index]; + [enc setFragmentBuffer:p_buffer offset:p_offset atIndex:p_index]; + } break; + case COMPUTE: { + id __unsafe_unretained enc = (id)encoder; + [enc setBuffer:p_buffer offset:p_offset atIndex:p_index]; + } break; } } } -void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count) { - if (p_shader->uses_argument_buffers) { - bind_uniforms_argument_buffers(p_shader, p_state, p_set_index, p_dynamic_offsets, p_frame_idx, p_frame_count); - } else { - bind_uniforms_direct(p_shader, p_state, p_set_index, p_dynamic_offsets); +void DirectEncoder::set(__unsafe_unretained id *p_samplers, NSRange p_range) { + if (cache.update(p_range, p_samplers)) { + switch (mode) { + case RENDER: { + id __unsafe_unretained enc = (id)encoder; + [enc setVertexSamplerStates:p_samplers withRange:p_range]; + [enc setFragmentSamplerStates:p_samplers withRange:p_range]; + } break; + case COMPUTE: { + id __unsafe_unretained enc = (id)encoder; + [enc setSamplerStates:p_samplers withRange:p_range]; + } break; + } } } -void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count) { +void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count) { DEV_ASSERT(p_shader->uses_argument_buffers); DEV_ASSERT(p_state.encoder != nil); + DEV_ASSERT(p_shader->dynamic_offset_layout.is_empty()); // Argument buffers do not support dynamic offsets. - UniformSet const &set_info = p_shader->sets[p_set_index]; - - id enc = p_state.encoder; - id device = enc.device; + id __unsafe_unretained enc = p_state.encoder; - BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage, p_set_index, p_dynamic_offsets, p_frame_idx, p_frame_count); + p_state.resource_tracker.merge_from(usage_to_resources); - uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE); - if (offset) { - [enc setBuffer:bus.buffer offset:bus.make_offset(p_frame_idx, *offset) atIndex:p_set_index]; - } + [enc setVertexBuffer:arg_buffer + offset:0 + atIndex:p_set_index]; + [enc setFragmentBuffer:arg_buffer offset:0 atIndex:p_set_index]; } -void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets) { +void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, DirectEncoder p_enc, uint32_t p_set_index, uint32_t p_dynamic_offsets) { DEV_ASSERT(!p_shader->uses_argument_buffers); - DEV_ASSERT(p_state.encoder != nil); - - id __unsafe_unretained enc = p_state.encoder; UniformSet const &set = p_shader->sets[p_set_index]; DynamicOffsetLayout layout = p_shader->dynamic_offset_layout; uint32_t dynamic_index = 0; - for (uint32_t i = 0; i < uniforms.size(); i++) { + for (uint32_t i = 0; i < MIN(uniforms.size(), set.uniforms.size()); i++) { RDD::BoundUniform const &uniform = uniforms[i]; const UniformInfo &ui = set.uniforms[i]; + const UniformInfo::Indexes &indexes = ui.slot; uint32_t frame_idx; if (uniform.is_dynamic()) { @@ -1408,15 +1671,6 @@ frame_idx = 0; } - const RDC::ShaderStage stage = RDC::ShaderStage::SHADER_STAGE_COMPUTE; - const ShaderStageUsage stage_usage = ShaderStageUsage(1 << stage); - - const BindingInfo *bi = ui.bindings.getptr(stage); - if (bi == nullptr || (ui.active_stages & stage_usage) == 0) { - // No binding for this stage. - continue; - } - switch (uniform.type) { case RDD::UNIFORM_TYPE_SAMPLER: { size_t count = uniform.ids.size(); @@ -1424,7 +1678,8 @@ for (size_t j = 0; j < count; j += 1) { objects[j] = rid::get(uniform.ids[j].id); } - [enc setSamplerStates:objects withRange:NSMakeRange(bi->index, count)]; + NSRange sampler_range = NSMakeRange(indexes.sampler, count); + p_enc.set(objects, sampler_range); } break; case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: { size_t count = uniform.ids.size() / 2; @@ -1436,47 +1691,44 @@ samplers[j] = sampler; textures[j] = texture; } - const BindingInfo *sbi = ui.bindings_secondary.getptr(stage); - if (sbi) { - [enc setSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)]; - } - [enc setTextures:textures withRange:NSMakeRange(bi->index, count)]; + NSRange sampler_range = NSMakeRange(indexes.sampler, count); + NSRange texture_range = NSMakeRange(indexes.texture, count); + p_enc.set(samplers, sampler_range); + p_enc.set(textures, texture_range); } break; case RDD::UNIFORM_TYPE_TEXTURE: { size_t count = uniform.ids.size(); - if (count == 1) { - id obj = rid::get(uniform.ids[0]); - [enc setTexture:obj atIndex:bi->index]; - } else { - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - id obj = rid::get(uniform.ids[j]); - objects[j] = obj; - } - [enc setTextures:objects withRange:NSMakeRange(bi->index, count)]; + id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + id obj = rid::get(uniform.ids[j]); + objects[j] = obj; } + NSRange texture_range = NSMakeRange(indexes.texture, count); + p_enc.set(objects, texture_range); } break; case RDD::UNIFORM_TYPE_IMAGE: { size_t count = uniform.ids.size(); - if (count == 1) { - id obj = rid::get(uniform.ids[0]); - [enc setTexture:obj atIndex:bi->index]; + id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + id obj = rid::get(uniform.ids[j]); + objects[j] = obj; + } + NSRange texture_range = NSMakeRange(indexes.texture, count); + p_enc.set(objects, texture_range); - const BindingInfo *sbi = ui.bindings_secondary.getptr(stage); - if (sbi) { - id tex = obj.parentTexture ? obj.parentTexture : obj; - id buf = tex.buffer; - if (buf) { - [enc setBuffer:buf offset:tex.bufferOffset atIndex:sbi->index]; - } - } - } else { - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); + if (indexes.buffer != UINT32_MAX) { + // Emulated atomic image access. + id __unsafe_unretained *bufs = ALLOCA_ARRAY(id __unsafe_unretained, count); for (size_t j = 0; j < count; j += 1) { id obj = rid::get(uniform.ids[j]); - objects[j] = obj; + id tex = obj.parentTexture ? obj.parentTexture : obj; + id buf = tex.buffer; + bufs[j] = buf; } - [enc setTextures:objects withRange:NSMakeRange(bi->index, count)]; + NSUInteger *offs = ALLOCA_ARRAY(NSUInteger, count); + bzero(offs, sizeof(NSUInteger) * count); + NSRange buffer_range = NSMakeRange(indexes.buffer, count); + p_enc.set(bufs, offs, buffer_range); } } break; case RDD::UNIFORM_TYPE_TEXTURE_BUFFER: { @@ -1490,27 +1742,23 @@ } break; case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: case RDD::UNIFORM_TYPE_STORAGE_BUFFER: { - const RenderingDeviceDriverMetal::BufferInfo *buf_info = (const RenderingDeviceDriverMetal::BufferInfo *)uniform.ids[0].id; - [enc setBuffer:buf_info->metal_buffer offset:0 atIndex:bi->index]; + const RDM::BufferInfo *buf_info = (const RDM::BufferInfo *)uniform.ids[0].id; + p_enc.set(buf_info->metal_buffer, 0, indexes.buffer); } break; case RDD::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: case RDD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: { const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id; - [enc setBuffer:buf_info->metal_buffer offset:frame_idx * buf_info->size_bytes atIndex:bi->index]; + p_enc.set(buf_info->metal_buffer, frame_idx * buf_info->size_bytes, indexes.buffer); } break; case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: { size_t count = uniform.ids.size(); - if (count == 1) { - id obj = rid::get(uniform.ids[0]); - [enc setTexture:obj atIndex:bi->index]; - } else { - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - id obj = rid::get(uniform.ids[j]); - objects[j] = obj; - } - [enc setTextures:objects withRange:NSMakeRange(bi->index, count)]; + id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); + for (size_t j = 0; j < count; j += 1) { + id obj = rid::get(uniform.ids[j]); + objects[j] = obj; } + NSRange texture_range = NSMakeRange(indexes.texture, count); + p_enc.set(objects, texture_range); } break; default: { DEV_ASSERT(false); @@ -1519,256 +1767,16 @@ } } -void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count) { - if (p_shader->uses_argument_buffers) { - bind_uniforms_argument_buffers(p_shader, p_state, p_set_index, p_dynamic_offsets, p_frame_idx, p_frame_count); - } else { - bind_uniforms_direct(p_shader, p_state, p_set_index, p_dynamic_offsets); - } -} - -BoundUniformSet &MDUniformSet::bound_uniform_set(MDShader *p_shader, id p_device, ResourceUsageMap &p_resource_usage, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count) { - BoundUniformSet *sus = bound_uniforms.getptr(p_shader); - if (sus != nullptr) { - BoundUniformSet &bs = *sus; - if (bs.is_dynamic()) { - update_dynamic_uniforms(p_shader, p_resource_usage, p_set_index, bs, p_dynamic_offsets, p_frame_idx); - } - bs.merge_into(p_resource_usage); - return bs; - } - - UniformSet const &set = p_shader->sets[p_set_index]; - - HashMap, StageResourceUsage> bound_resources; - auto add_usage = [&bound_resources](id __unsafe_unretained res, RDD::ShaderStage stage, MTLResourceUsage usage) { - StageResourceUsage *sru = bound_resources.getptr(res); - if (sru == nullptr) { - bound_resources.insert(res, stage_resource_usage(stage, usage)); - } else { - *sru |= stage_resource_usage(stage, usage); - } - }; - id enc_buffer = nil; - uint32_t frame_size = set.buffer_size; - uint32_t buffer_size = frame_size; - if (!set.dynamic_uniforms.is_empty()) { - // We need to store a copy of the argument buffer for each frame that could be in flight, just - // like the dynamic buffers themselves. - buffer_size *= p_frame_count; - } else { - frame_size = 0; - } - if (set.buffer_size > 0) { - MTLResourceOptions options = MTLResourceHazardTrackingModeUntracked | MTLResourceStorageModeShared; - enc_buffer = [p_device newBufferWithLength:buffer_size options:options]; - for (KeyValue> const &kv : set.encoders) { - RDD::ShaderStage const stage = kv.key; - ShaderStageUsage const stage_usage = ShaderStageUsage(1 << stage); - id const enc = kv.value; - - [enc setArgumentBuffer:enc_buffer offset:set.offsets[stage]]; - - for (uint32_t i = 0; i < uniforms.size(); i++) { - RDD::BoundUniform const &uniform = uniforms[i]; - const UniformInfo &ui = set.uniforms[i]; - - const BindingInfo *bi = ui.bindings.getptr(stage); - if (bi == nullptr) { - // No binding for this stage. - continue; - } - - if ((ui.active_stages & stage_usage) == 0) { - // Not active for this state, so don't bind anything. - continue; - } - - switch (uniform.type) { - case RDD::UNIFORM_TYPE_SAMPLER: { - size_t count = uniform.ids.size(); - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - objects[j] = rid::get(uniform.ids[j].id); - } - [enc setSamplerStates:objects withRange:NSMakeRange(bi->index, count)]; - } break; - case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: { - size_t count = uniform.ids.size() / 2; - id __unsafe_unretained *textures = ALLOCA_ARRAY(id __unsafe_unretained, count); - id __unsafe_unretained *samplers = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (uint32_t j = 0; j < count; j += 1) { - id sampler = rid::get(uniform.ids[j * 2 + 0]); - id texture = rid::get(uniform.ids[j * 2 + 1]); - samplers[j] = sampler; - textures[j] = texture; - add_usage(texture, stage, bi->usage); - } - const BindingInfo *sbi = ui.bindings_secondary.getptr(stage); - if (sbi) { - [enc setSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)]; - } - [enc setTextures:textures - withRange:NSMakeRange(bi->index, count)]; - } break; - case RDD::UNIFORM_TYPE_TEXTURE: { - size_t count = uniform.ids.size(); - if (count == 1) { - id obj = rid::get(uniform.ids[0]); - [enc setTexture:obj atIndex:bi->index]; - add_usage(obj, stage, bi->usage); - } else { - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - id obj = rid::get(uniform.ids[j]); - objects[j] = obj; - add_usage(obj, stage, bi->usage); - } - [enc setTextures:objects withRange:NSMakeRange(bi->index, count)]; - } - } break; - case RDD::UNIFORM_TYPE_IMAGE: { - size_t count = uniform.ids.size(); - if (count == 1) { - id obj = rid::get(uniform.ids[0]); - [enc setTexture:obj atIndex:bi->index]; - add_usage(obj, stage, bi->usage); - const BindingInfo *sbi = ui.bindings_secondary.getptr(stage); - if (sbi) { - id tex = obj.parentTexture ? obj.parentTexture : obj; - id buf = tex.buffer; - if (buf) { - [enc setBuffer:buf offset:tex.bufferOffset atIndex:sbi->index]; - } - } - } else { - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - id obj = rid::get(uniform.ids[j]); - objects[j] = obj; - add_usage(obj, stage, bi->usage); - } - [enc setTextures:objects withRange:NSMakeRange(bi->index, count)]; - } - } break; - case RDD::UNIFORM_TYPE_TEXTURE_BUFFER: { - ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER"); - } break; - case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: { - ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER"); - } break; - case RDD::UNIFORM_TYPE_IMAGE_BUFFER: { - CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER"); - } break; - case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: - case RDD::UNIFORM_TYPE_STORAGE_BUFFER: { - const RenderingDeviceDriverMetal::BufferInfo *buf_info = (const RenderingDeviceDriverMetal::BufferInfo *)uniform.ids[0].id; - [enc setBuffer:buf_info->metal_buffer offset:0 atIndex:bi->index]; - add_usage(buf_info->metal_buffer, stage, bi->usage); - } break; - case RDD::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: - case RDD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: { - const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id; - add_usage(buf_info->metal_buffer, stage, bi->usage); - } break; - - case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: { - size_t count = uniform.ids.size(); - if (count == 1) { - id obj = rid::get(uniform.ids[0]); - [enc setTexture:obj atIndex:bi->index]; - add_usage(obj, stage, bi->usage); - } else { - id __unsafe_unretained *objects = ALLOCA_ARRAY(id __unsafe_unretained, count); - for (size_t j = 0; j < count; j += 1) { - id obj = rid::get(uniform.ids[j]); - objects[j] = obj; - add_usage(obj, stage, bi->usage); - } - [enc setTextures:objects withRange:NSMakeRange(bi->index, count)]; - } - } break; - default: { - DEV_ASSERT(false); - } - } - } - } - - // Duplicate the argument buffer data for each frame, if needed. - // The dynamic uniforms will be updated each frame. - if (frame_size > 0) { - void *ptr = enc_buffer.contents; - for (uint32_t i = 1; i < p_frame_count; i++) { - void *dst = (void *)((uintptr_t)ptr + i * frame_size); - memcpy(dst, ptr, frame_size); - } - } - } - - ResourceUsageMap usage_to_resources; - for (KeyValue, StageResourceUsage> const &keyval : bound_resources) { - ResourceVector *resources = usage_to_resources.getptr(keyval.value); - if (resources == nullptr) { - resources = &usage_to_resources.insert(keyval.value, ResourceVector())->value; - } - int64_t pos = resources->span().bisect(keyval.key, true); - if (pos == resources->size() || (*resources)[pos] != keyval.key) { - resources->insert(pos, keyval.key); - } - } - - BoundUniformSet &bs = bound_uniforms.insert(p_shader, BoundUniformSet(enc_buffer, std::move(usage_to_resources), frame_size))->value; - if (bs.is_dynamic()) { - update_dynamic_uniforms(p_shader, p_resource_usage, p_set_index, bs, p_dynamic_offsets, p_frame_idx); - } - bs.merge_into(p_resource_usage); - return bs; -} - -void MDUniformSet::update_dynamic_uniforms(MDShader *p_shader, ResourceUsageMap &p_resource_usage, uint32_t p_set_index, BoundUniformSet &p_bound_set, uint32_t p_dynamic_offsets, uint32_t p_frame_idx) { - // This shouldn't be called if the set doesn't have dynamic uniforms. - DEV_ASSERT(p_bound_set.is_dynamic()); - - UniformSet const &set = p_shader->sets[p_set_index]; - DEV_ASSERT(!set.dynamic_uniforms.is_empty()); // Programming error if this is empty. - - DynamicOffsetLayout layout = p_shader->dynamic_offset_layout; - - for (KeyValue> const &kv : set.encoders) { - RDD::ShaderStage const stage = kv.key; - ShaderStageUsage const stage_usage = ShaderStageUsage(1 << stage); - id const __unsafe_unretained enc = kv.value; - - [enc setArgumentBuffer:p_bound_set.buffer offset:p_bound_set.make_offset(p_frame_idx, set.offsets[stage])]; - - uint32_t dynamic_index = 0; - - for (uint32_t i : set.dynamic_uniforms) { - RDD::BoundUniform const &uniform = uniforms[i]; - const UniformInfo &ui = set.uniforms[i]; - - const BindingInfo *bi = ui.bindings.getptr(stage); - if (bi == nullptr) { - // No binding for this stage. - continue; - } - - if ((ui.active_stages & stage_usage) == None) { - // Not active for this state, so don't bind anything. - continue; - } +void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count) { + DEV_ASSERT(p_shader->uses_argument_buffers); + DEV_ASSERT(p_state.encoder != nil); - uint32_t shift = layout.get_offset_index_shift(p_set_index, dynamic_index); - dynamic_index++; - uint32_t frame_idx = (p_dynamic_offsets >> shift) & 0xf; + id enc = p_state.encoder; - const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id; - [enc setBuffer:buf_info->metal_buffer - offset:frame_idx * buf_info->size_bytes - atIndex:bi->index]; - } - } + p_state.resource_tracker.merge_from(usage_to_resources); + [enc setBuffer:arg_buffer + offset:0 + atIndex:p_set_index]; } MTLFmtCaps MDSubpass::getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const { diff --git a/drivers/metal/metal_objects_shared.h b/drivers/metal/metal_objects_shared.h new file mode 100644 index 000000000000..d74072c95a0f --- /dev/null +++ b/drivers/metal/metal_objects_shared.h @@ -0,0 +1,196 @@ +/**************************************************************************/ +/* metal_objects_shared.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#import "metal_device_properties.h" +#import "metal_utils.h" + +using RDC = RenderingDeviceCommons; + +// These types can be used in Vector and other containers that use +// pointer operations not supported by ARC. +namespace MTL { +#define MTL_CLASS(name) \ + class name { \ + public: \ + name(id obj = nil) : m_obj(obj) {} \ + operator id() const { \ + return m_obj; \ + } \ + id m_obj; \ + }; + +MTL_CLASS(Texture) + +} //namespace MTL + +enum ShaderStageUsage : uint32_t { + None = 0, + Vertex = RDD::SHADER_STAGE_VERTEX_BIT, + Fragment = RDD::SHADER_STAGE_FRAGMENT_BIT, + TesselationControl = RDD::SHADER_STAGE_TESSELATION_CONTROL_BIT, + TesselationEvaluation = RDD::SHADER_STAGE_TESSELATION_EVALUATION_BIT, + Compute = RDD::SHADER_STAGE_COMPUTE_BIT, +}; + +_FORCE_INLINE_ ShaderStageUsage &operator|=(ShaderStageUsage &p_a, int p_b) { + p_a = ShaderStageUsage(uint32_t(p_a) | uint32_t(p_b)); + return p_a; +} + +struct ClearAttKey { + const static uint32_t COLOR_COUNT = MAX_COLOR_ATTACHMENT_COUNT; + const static uint32_t DEPTH_INDEX = COLOR_COUNT; + const static uint32_t STENCIL_INDEX = DEPTH_INDEX + 1; + const static uint32_t ATTACHMENT_COUNT = STENCIL_INDEX + 1; + + enum Flags : uint16_t { + CLEAR_FLAGS_NONE = 0, + CLEAR_FLAGS_LAYERED = 1 << 0, + }; + + Flags flags = CLEAR_FLAGS_NONE; + uint16_t sample_count = 0; + uint16_t pixel_formats[ATTACHMENT_COUNT] = { 0 }; + + _FORCE_INLINE_ void set_color_format(uint32_t p_idx, MTLPixelFormat p_fmt) { pixel_formats[p_idx] = p_fmt; } + _FORCE_INLINE_ void set_depth_format(MTLPixelFormat p_fmt) { pixel_formats[DEPTH_INDEX] = p_fmt; } + _FORCE_INLINE_ void set_stencil_format(MTLPixelFormat p_fmt) { pixel_formats[STENCIL_INDEX] = p_fmt; } + _FORCE_INLINE_ MTLPixelFormat depth_format() const { return (MTLPixelFormat)pixel_formats[DEPTH_INDEX]; } + _FORCE_INLINE_ MTLPixelFormat stencil_format() const { return (MTLPixelFormat)pixel_formats[STENCIL_INDEX]; } + _FORCE_INLINE_ void enable_layered_rendering() { flags::set(flags, CLEAR_FLAGS_LAYERED); } + + _FORCE_INLINE_ bool is_enabled(uint32_t p_idx) const { return pixel_formats[p_idx] != 0; } + _FORCE_INLINE_ bool is_depth_enabled() const { return pixel_formats[DEPTH_INDEX] != 0; } + _FORCE_INLINE_ bool is_stencil_enabled() const { return pixel_formats[STENCIL_INDEX] != 0; } + _FORCE_INLINE_ bool is_layered_rendering_enabled() const { return flags::any(flags, CLEAR_FLAGS_LAYERED); } + + _FORCE_INLINE_ bool operator==(const ClearAttKey &p_rhs) const { + return memcmp(this, &p_rhs, sizeof(ClearAttKey)) == 0; + } + + uint32_t hash() const { + uint32_t h = hash_murmur3_one_32(flags); + h = hash_murmur3_one_32(sample_count, h); + h = hash_murmur3_buffer(pixel_formats, ATTACHMENT_COUNT * sizeof(pixel_formats[0]), h); + return hash_fmix32(h); + } +}; + +/** + * Returns an index that can be used to map a shader stage to an index in a fixed-size array that is used for + * a single pipeline type. + */ +_FORCE_INLINE_ static uint32_t to_index(RDD::ShaderStage p_s) { + switch (p_s) { + case RenderingDeviceCommons::SHADER_STAGE_VERTEX: + case RenderingDeviceCommons::SHADER_STAGE_TESSELATION_CONTROL: + case RenderingDeviceCommons::SHADER_STAGE_TESSELATION_EVALUATION: + case RenderingDeviceCommons::SHADER_STAGE_COMPUTE: + default: + return 0; + case RenderingDeviceCommons::SHADER_STAGE_FRAGMENT: + return 1; + } +} + +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDFrameBuffer { + Vector textures; + +public: + Size2i size; + MDFrameBuffer(Vector p_textures, Size2i p_size) : + textures(p_textures), size(p_size) {} + MDFrameBuffer() {} + + /// Returns the texture at the given index. + _ALWAYS_INLINE_ MTL::Texture get_texture(uint32_t p_idx) const { + return textures[p_idx]; + } + + /// Returns true if the texture at the given index is not nil. + _ALWAYS_INLINE_ bool has_texture(uint32_t p_idx) const { + return textures[p_idx] != nil; + } + + /// Set the texture at the given index. + _ALWAYS_INLINE_ void set_texture(uint32_t p_idx, MTL::Texture p_texture) { + textures.write[p_idx] = p_texture; + } + + /// Unset or nil the texture at the given index. + _ALWAYS_INLINE_ void unset_texture(uint32_t p_idx) { + textures.write[p_idx] = nil; + } + + /// Resizes buffers to the specified size. + _ALWAYS_INLINE_ void set_texture_count(uint32_t p_size) { + textures.resize(p_size); + } + + virtual ~MDFrameBuffer() = default; +}; + +// These functions are used to convert between Objective-C objects and +// the RIDs used by Godot, respecting automatic reference counting. +namespace rid { + +// Converts an Objective-C object to a pointer, and incrementing the +// reference count. +_FORCE_INLINE_ void *owned(id p_id) { + return (__bridge_retained void *)p_id; +} + +#define MAKE_ID(FROM, TO) \ + _FORCE_INLINE_ TO make(FROM p_obj) { \ + return TO(owned(p_obj)); \ + } + +// These are shared for Metal and Metal 4 drivers + +MAKE_ID(id, RDD::TextureID) +MAKE_ID(id, RDD::BufferID) +MAKE_ID(id, RDD::SamplerID) +MAKE_ID(MTLVertexDescriptor *, RDD::VertexFormatID) + +#undef MAKE_ID + +// Converts a pointer to an Objective-C object without changing the reference count. +_FORCE_INLINE_ auto get(RDD::ID p_id) { + return (p_id.id) ? (__bridge ::id)(void *)p_id.id : nil; +} + +// Converts a pointer to an Objective-C object, and decrements the reference count. +_FORCE_INLINE_ auto release(RDD::ID p_id) { + return (__bridge_transfer ::id)(void *)p_id.id; +} + +} // namespace rid diff --git a/drivers/metal/metal_utils.h b/drivers/metal/metal_utils.h index bcb1f8c73062..54c3509984df 100644 --- a/drivers/metal/metal_utils.h +++ b/drivers/metal/metal_utils.h @@ -34,6 +34,14 @@ #import +/// Godot limits the number of dynamic buffers to 8. +/// +/// This is a minimum guarantee for Vulkan. +constexpr uint32_t MAX_DYNAMIC_BUFFERS = 8; + +// From rendering/rendering_device/vsync/frame_queue_size +static constexpr uint32_t MAX_FRAME_COUNT = 3; + #pragma mark - Boolean flags namespace flags { @@ -103,11 +111,37 @@ extern os_log_t LOG_DRIVER; // Used for dynamic tracing. extern os_log_t LOG_INTERVALS; -_FORCE_INLINE_ static uint32_t make_msl_version(uint32_t p_major, uint32_t p_minor = 0, uint32_t p_patch = 0) { +_FORCE_INLINE_ static constexpr uint32_t make_msl_version(uint32_t p_major, uint32_t p_minor = 0, uint32_t p_patch = 0) { return (p_major * 10000) + (p_minor * 100) + p_patch; } -_FORCE_INLINE_ static void parse_msl_version(uint32_t p_version, uint32_t &r_major, uint32_t &r_minor) { +_FORCE_INLINE_ static constexpr void parse_msl_version(uint32_t p_version, uint32_t &r_major, uint32_t &r_minor) { r_major = p_version / 10000; r_minor = (p_version % 10000) / 100; } + +constexpr uint32_t MSL_VERSION_23 = make_msl_version(2, 3); +constexpr uint32_t MSL_VERSION_24 = make_msl_version(2, 4); +constexpr uint32_t MSL_VERSION_30 = make_msl_version(3, 0); +constexpr uint32_t MSL_VERSION_31 = make_msl_version(3, 1); +constexpr uint32_t MSL_VERSION_32 = make_msl_version(3, 2); +constexpr uint32_t MSL_VERSION_40 = make_msl_version(4, 0); + +/* MSL Language version table + * + * | Version | macOS | iOS | + * |---------|---------|---------| + * | 1.0 | | 9.0 | + * | 1.1 | 10.11 | 9.0 | + * | 1.2 | 10.12 | 10.0 | + * | 2.0 | 10.13 | 11.0 | + * | 2.1 | 10.14 | 12.0 | + * | 2.2 | 10.15 | 13.0 | + * | 2.3 | 11.0 | 14.0 | + * | 2.4 | 12.0 | 15.0 | + * | 3.0 | 13.0 | 16.0 | + * | 3.1 | 14.0 | 17.0 | + * | 3.2 | 15.0 | 18.0 | + * | 4.0 | 26.0 | 26.0 | + * |---------|---------|---------| + */ diff --git a/drivers/metal/pixel_formats.mm b/drivers/metal/pixel_formats.mm index 9ce9cdda733a..ba15ca026ea5 100644 --- a/drivers/metal/pixel_formats.mm +++ b/drivers/metal/pixel_formats.mm @@ -117,6 +117,8 @@ void clear(T *p_val, size_t p_count = 1) { #if defined(VISIONOS_ENABLED) return false; #else + // Deprecated in SDK 26.0 + GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wdeprecated-declarations") switch (p_format) { case MTLPixelFormatPVRTC_RGBA_2BPP: case MTLPixelFormatPVRTC_RGBA_2BPP_sRGB: @@ -130,6 +132,7 @@ void clear(T *p_val, size_t p_count = 1) { default: return false; } + GODOT_CLANG_WARNING_POP #endif } @@ -673,11 +676,13 @@ void clear(T *p_val, size_t p_count = 1) { addMTLPixelFormatDesc(RGBA32Float, Color128, All); #if !defined(VISIONOS_ENABLED) + GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wdeprecated-declarations") // Compressed pixel formats addMTLPixelFormatDesc(PVRTC_RGBA_2BPP, PVRTC_RGBA_2BPP, RF); addMTLPixelFormatDescSRGB(PVRTC_RGBA_2BPP_sRGB, PVRTC_RGBA_2BPP, RF, PVRTC_RGBA_2BPP); addMTLPixelFormatDesc(PVRTC_RGBA_4BPP, PVRTC_RGBA_4BPP, RF); addMTLPixelFormatDescSRGB(PVRTC_RGBA_4BPP_sRGB, PVRTC_RGBA_4BPP, RF, PVRTC_RGBA_4BPP); + GODOT_CLANG_WARNING_POP #endif addMTLPixelFormatDesc(ETC2_RGB8, ETC2_RGB8, RF); diff --git a/drivers/metal/rendering_context_driver_metal.h b/drivers/metal/rendering_context_driver_metal.h index d2dd4958a991..8e313d81f188 100644 --- a/drivers/metal/rendering_context_driver_metal.h +++ b/drivers/metal/rendering_context_driver_metal.h @@ -53,17 +53,24 @@ class MDCommandBuffer; #endif class PixelFormats; -class MDResourceCache; + +#ifdef __OBJC__ +#define METAL_DEVICE id +#define METAL_DRAWABLE id +#define METAL_LAYER CAMetalLayer *__unsafe_unretained +#define METAL_RESIDENCY_SET id +#else +#define METAL_DEVICE void * +#define METAL_DRAWABLE void * +#define METAL_LAYER void * +#define METAL_RESIDENCY_SET void * +#endif class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingContextDriverMetal : public RenderingContextDriver { bool capture_available = false; protected: -#ifdef __OBJC__ - id metal_device = nullptr; -#else - void *metal_device = nullptr; -#endif + METAL_DEVICE metal_device = nullptr; Device device; // There is only one device on Apple Silicon. public: @@ -88,20 +95,12 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingContextDriverMe // Platform-specific data for the Windows embedded in this driver. struct WindowPlatformData { -#ifdef __OBJC__ - CAMetalLayer *__unsafe_unretained layer; -#else - void *layer; -#endif + METAL_LAYER layer; }; class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) Surface { protected: -#ifdef __OBJC__ - id device; -#else - void *device; -#endif + METAL_DEVICE device; public: uint32_t width = 0; @@ -110,15 +109,8 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingContextDriverMe bool needs_resize = false; double present_minimum_duration = 0.0; - Surface( -#ifdef __OBJC__ - id p_device -#else - void *p_device -#endif - ) : - device(p_device) { - } + Surface(METAL_DEVICE p_device) : + device(p_device) {} virtual ~Surface() = default; MTLPixelFormat get_pixel_format() const { return MTLPixelFormatBGRA8Unorm; } @@ -128,12 +120,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingContextDriverMe void set_max_fps(int p_max_fps) { present_minimum_duration = p_max_fps ? 1.0 / p_max_fps : 0.0; } }; -#ifdef __OBJC__ - id -#else - void * -#endif - get_metal_device() const { + METAL_DEVICE get_metal_device() const { return metal_device; } diff --git a/drivers/metal/rendering_context_driver_metal.mm b/drivers/metal/rendering_context_driver_metal.mm index 6871b97385fb..51b752f2a041 100644 --- a/drivers/metal/rendering_context_driver_metal.mm +++ b/drivers/metal/rendering_context_driver_metal.mm @@ -32,6 +32,22 @@ #import "rendering_device_driver_metal.h" +#include "core/templates/sort_array.h" + +#import +#import + +#pragma mark - Logging + +os_log_t LOG_DRIVER; +// Used for dynamic tracing. +os_log_t LOG_INTERVALS; + +__attribute__((constructor)) static void InitializeLogging(void) { + LOG_DRIVER = os_log_create("org.godotengine.godot.metal", OS_LOG_CATEGORY_POINTS_OF_INTEREST); + LOG_INTERVALS = os_log_create("org.godotengine.godot.metal", "events"); +} + @protocol MTLDeviceEx #if TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 130300 - (void)setShouldMaximizeConcurrentCompilation:(BOOL)v; @@ -184,9 +200,115 @@ void present(MDCommandBuffer *p_cmd_buffer) override final { } }; +class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) SurfaceOffscreen : public RenderingContextDriverMetal::Surface { + int frame_buffer_size = 3; + MDFrameBuffer *frame_buffers; + LocalVector> textures; + LocalVector> drawables; + + int32_t rear = -1; + std::atomic_int count; + uint64_t target_time = 0; + CAMetalLayer *layer; + +public: + SurfaceOffscreen(CAMetalLayer *p_layer, id p_device) : + Surface(p_device), layer(p_layer) { + layer.allowsNextDrawableTimeout = YES; + layer.framebufferOnly = YES; + layer.opaque = OS::get_singleton()->is_layered_allowed() ? NO : YES; + layer.pixelFormat = get_pixel_format(); + layer.device = p_device; +#if TARGET_OS_OSX + layer.displaySyncEnabled = NO; +#endif + target_time = OS::get_singleton()->get_ticks_usec(); + + textures.resize(frame_buffer_size); + drawables.resize(frame_buffer_size); + + frame_buffers = memnew_arr(MDFrameBuffer, frame_buffer_size); + for (int i = 0; i < frame_buffer_size; i++) { + frame_buffers[i].set_texture_count(1); + } + } + + ~SurfaceOffscreen() override { + memdelete_arr(frame_buffers); + } + + Error resize(uint32_t p_desired_framebuffer_count) override final { + if (width == 0 || height == 0) { + // Very likely the window is minimized, don't create a swap chain. + return ERR_SKIP; + } + + CGSize drawableSize = CGSizeMake(width, height); + CGSize current = layer.drawableSize; + if (!CGSizeEqualToSize(current, drawableSize)) { + layer.drawableSize = drawableSize; + } + + return OK; + } + + RDD::FramebufferID acquire_next_frame_buffer() override final { + if (count.load(std::memory_order_relaxed) == 3) { + // Wait for a frame to be presented. + return RDD::FramebufferID(); + } + + rear = (rear + 1) % 3; + count.fetch_add(1, std::memory_order_relaxed); + + MDFrameBuffer &frame_buffer = frame_buffers[rear]; + + if (textures[rear] == nil || textures[rear].width != width || textures[rear].height != height) { + MTLTextureDescriptor *texture_descriptor = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:get_pixel_format() width:width height:height mipmapped:NO]; + texture_descriptor.usage = MTLTextureUsageRenderTarget; + texture_descriptor.hazardTrackingMode = MTLHazardTrackingModeUntracked; + texture_descriptor.storageMode = MTLStorageModePrivate; + textures[rear] = [device newTextureWithDescriptor:texture_descriptor]; + } + + frame_buffer.size = Size2i(width, height); + uint64_t now = OS::get_singleton()->get_ticks_usec(); + if (now >= target_time) { + target_time = now + 1'000'000; // 1 second into the future. + id drawable = layer.nextDrawable; + ERR_FAIL_NULL_V_MSG(drawable, RDD::FramebufferID(), "no drawable available"); + drawables[rear] = drawable; + frame_buffer.set_texture(0, drawable.texture); + } else { + frame_buffer.set_texture(0, textures[rear]); + } + + return RDD::FramebufferID(&frame_buffers[rear]); + } + + void present(MDCommandBuffer *p_cmd_buffer) override final { + MDFrameBuffer *frame_buffer = &frame_buffers[rear]; + + if (drawables[rear] != nil) { + [p_cmd_buffer->get_command_buffer() presentDrawable:drawables[rear]]; + drawables[rear] = nil; + } + + [p_cmd_buffer->get_command_buffer() addScheduledHandler:^(id p_command_buffer) { + frame_buffer->unset_texture(0); + count.fetch_add(-1, std::memory_order_relaxed); + }]; + } +}; + RenderingContextDriver::SurfaceID RenderingContextDriverMetal::surface_create(const void *p_platform_data) { const WindowPlatformData *wpd = (const WindowPlatformData *)(p_platform_data); - Surface *surface = memnew(SurfaceLayer(wpd->layer, metal_device)); + Surface *surface; + if (String v = OS::get_singleton()->get_environment("GODOT_MTL_OFF_SCREEN"); v == U"1") { + surface = memnew(SurfaceOffscreen(wpd->layer, metal_device)); + } else { + surface = memnew(SurfaceLayer(wpd->layer, metal_device)); + } return SurfaceID(surface); } diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h index 0012ac22f06e..b1b6b64030dc 100644 --- a/drivers/metal/rendering_device_driver_metal.h +++ b/drivers/metal/rendering_device_driver_metal.h @@ -30,14 +30,16 @@ #pragma once +#import "metal_device_profile.h" #import "metal_objects.h" -#import "rendering_shader_container_metal.h" #include "servers/rendering/rendering_device_driver.h" #import #import +class RenderingShaderContainerFormatMetal; + #ifdef DEBUG_ENABLED #ifndef _DEBUG #define _DEBUG @@ -59,11 +61,11 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet RenderingContextDriver::Device context_device; id device = nil; - uint32_t frame_count = 1; + uint32_t _frame_count = 1; /// frame_index is a cyclic counter derived from the current frame number modulo frame_count, /// cycling through values from 0 to frame_count - 1 - uint32_t frame_index = 0; - uint32_t frames_drawn = 0; + uint32_t _frame_index = 0; + uint32_t _frames_drawn = 0; MetalDeviceProperties *device_properties = nullptr; MetalDeviceProfile device_profile; @@ -179,9 +181,64 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet private: struct Fence { + virtual void signal(id p_cmd_buffer) = 0; + virtual Error wait(uint32_t p_timeout_ms) = 0; + virtual ~Fence() = default; + }; + + struct FenceEvent : public Fence { + id event; + uint64_t value; + FenceEvent(id p_event) : + event(p_event), + value(0) {} + + virtual void signal(id p_cb) override { + if (p_cb) { + value++; + [p_cb encodeSignalEvent:event value:value]; + } + } + + virtual Error wait(uint32_t p_timeout_ms) override { + GODOT_CLANG_WARNING_PUSH + GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability") + BOOL signaled = [event waitUntilSignaledValue:value timeoutMS:p_timeout_ms]; + GODOT_CLANG_WARNING_POP + if (!signaled) { +#ifdef DEBUG_ENABLED + ERR_PRINT("timeout waiting for fence"); +#endif + return ERR_TIMEOUT; + } + + return OK; + } + }; + + struct FenceSemaphore : public Fence { dispatch_semaphore_t semaphore; - Fence() : + FenceSemaphore() : semaphore(dispatch_semaphore_create(0)) {} + + virtual void signal(id p_cb) override { + if (p_cb) { + [p_cb addCompletedHandler:^(id buffer) { + dispatch_semaphore_signal(semaphore); + }]; + } else { + dispatch_semaphore_signal(semaphore); + } + } + + virtual Error wait(uint32_t p_timeout_ms) override { + dispatch_time_t timeout = dispatch_time(DISPATCH_TIME_NOW, static_cast(p_timeout_ms) * 1000000); + long result = dispatch_semaphore_wait(semaphore, timeout); + if (result != 0) { + return ERR_TIMEOUT; + } + return OK; + } }; public: @@ -283,17 +340,6 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet #pragma mark Transfer -private: - enum class CopySource { - Buffer, - Texture, - }; - void _copy_texture_buffer(CommandBufferID p_cmd_buffer, - CopySource p_source, - TextureID p_texture, - BufferID p_buffer, - VectorView p_regions); - public: virtual void command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) override final; virtual void command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView p_regions) override final; @@ -455,6 +501,10 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet size_t get_texel_buffer_alignment_for_format(RDD::DataFormat p_format) const; size_t get_texel_buffer_alignment_for_format(MTLPixelFormat p_format) const; + _FORCE_INLINE_ uint32_t frame_count() const { return _frame_count; } + _FORCE_INLINE_ uint32_t frame_index() const { return _frame_index; } + _FORCE_INLINE_ uint32_t frames_drawn() const { return _frames_drawn; } + /******************/ RenderingDeviceDriverMetal(RenderingContextDriverMetal *p_context_driver); ~RenderingDeviceDriverMetal(); diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index 105d7f8eac80..32b797533736 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -54,7 +54,6 @@ #import "rendering_context_driver_metal.h" #import "rendering_shader_container_metal.h" -#include "core/io/compression.h" #include "core/io/marshalls.h" #include "core/string/ustring.h" #include "core/templates/hash_map.h" @@ -68,14 +67,9 @@ #pragma mark - Logging -os_log_t LOG_DRIVER; +extern os_log_t LOG_DRIVER; // Used for dynamic tracing. -os_log_t LOG_INTERVALS; - -__attribute__((constructor)) static void InitializeLogging(void) { - LOG_DRIVER = os_log_create("org.godotengine.godot.metal", OS_LOG_CATEGORY_POINTS_OF_INTEREST); - LOG_INTERVALS = os_log_create("org.godotengine.godot.metal", "events"); -} +extern os_log_t LOG_INTERVALS; /*****************/ /**** GENERIC ****/ @@ -91,14 +85,6 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER_OR_EQUAL, MTLCompareFunctionGreaterEqual)); static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_ALWAYS, MTLCompareFunctionAlways)); -_FORCE_INLINE_ MTLSize mipmapLevelSizeFromTexture(id p_tex, NSUInteger p_level) { - MTLSize lvlSize; - lvlSize.width = MAX(p_tex.width >> p_level, 1UL); - lvlSize.height = MAX(p_tex.height >> p_level, 1UL); - lvlSize.depth = MAX(p_tex.depth >> p_level, 1UL); - return lvlSize; -} - _FORCE_INLINE_ MTLSize mipmapLevelSizeFromSize(MTLSize p_size, NSUInteger p_level) { if (p_level == 0) { return p_size; @@ -111,10 +97,6 @@ _FORCE_INLINE_ MTLSize mipmapLevelSizeFromSize(MTLSize p_size, NSUInteger p_leve return lvlSize; } -_FORCE_INLINE_ static bool operator==(MTLSize p_a, MTLSize p_b) { - return p_a.width == p_b.width && p_a.height == p_b.height && p_a.depth == p_b.depth; -} - /*****************/ /**** BUFFERS ****/ /*****************/ @@ -122,7 +104,7 @@ _FORCE_INLINE_ MTLSize mipmapLevelSizeFromSize(MTLSize p_size, NSUInteger p_leve RDD::BufferID RenderingDeviceDriverMetal::buffer_create(uint64_t p_size, BitField p_usage, MemoryAllocationType p_allocation_type, uint64_t p_frames_drawn) { const uint64_t original_size = p_size; if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) { - p_size = round_up_to_alignment(p_size, 16u) * frame_count; + p_size = round_up_to_alignment(p_size, 16u) * _frame_count; } MTLResourceOptions options = 0; @@ -197,7 +179,7 @@ _FORCE_INLINE_ MTLSize mipmapLevelSizeFromSize(MTLSize p_size, NSUInteger p_leve ERR_FAIL_COND_V_MSG(buf_info->last_frame_mapped == p_frames_drawn, nullptr, "Buffers with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT must only be mapped once per frame. Otherwise there could be race conditions with the GPU. Amalgamate all data uploading into one map(), use an extra buffer or remove the bit."); buf_info->last_frame_mapped = p_frames_drawn; #endif - return (uint8_t *)buf_info->metal_buffer.contents + buf_info->next_frame_index(frame_count) * buf_info->size_bytes; + return (uint8_t *)buf_info->metal_buffer.contents + buf_info->next_frame_index(_frame_count) * buf_info->size_bytes; } void RenderingDeviceDriverMetal::buffer_flush(BufferID p_buffer) { @@ -328,7 +310,9 @@ _FORCE_INLINE_ MTLSize mipmapLevelSizeFromSize(MTLSize p_size, NSUInteger p_leve #if defined(VISIONOS_ENABLED) const bool supports_memoryless = true; #else + GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wdeprecated-declarations") const bool supports_memoryless = (*device_properties).features.highestFamily >= MTLGPUFamilyApple2 && (*device_properties).features.highestFamily < MTLGPUFamilyMac1; + GODOT_CLANG_WARNING_POP #endif if (supports_memoryless && p_format.usage_bits & TEXTURE_USAGE_TRANSIENT_BIT) { options = MTLResourceStorageModeMemoryless | MTLResourceHazardTrackingModeTracked; @@ -552,12 +536,12 @@ _FORCE_INLINE_ MTLSize mipmapLevelSizeFromSize(MTLSize p_size, NSUInteger p_leve } uint64_t RenderingDeviceDriverMetal::texture_get_allocation_size(TextureID p_texture) { - id obj = rid::get(p_texture); + id __unsafe_unretained obj = rid::get(p_texture); return obj.allocatedSize; } void RenderingDeviceDriverMetal::texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) { - id obj = rid::get(p_texture); + id __unsafe_unretained obj = rid::get(p_texture); *r_layout = {}; PixelFormats &pf = *pixel_formats; @@ -791,9 +775,13 @@ static const API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MTLSamplerBorderC desc.normalizedCoordinates = !p_state.unnormalized_uvw; +#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 260000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 260000 || __TV_OS_VERSION_MAX_ALLOWED >= 260000 || __VISION_OS_VERSION_MAX_ALLOWED >= 260000 if (p_state.lod_bias != 0.0) { - WARN_PRINT_ONCE("Metal does not support LOD bias for samplers."); + if (@available(macOS 26.0, iOS 26.0, tvOS 26.0, visionOS 26.0, *)) { + desc.lodBias = p_state.lod_bias; + } } +#endif id obj = [device newSamplerStateWithDescriptor:desc]; ERR_FAIL_NULL_V_MSG(obj, SamplerID(), "newSamplerStateWithDescriptor failed"); @@ -863,17 +851,18 @@ static const API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MTLSamplerBorderC #pragma mark - Fences RDD::FenceID RenderingDeviceDriverMetal::fence_create() { - Fence *fence = memnew(Fence); + Fence *fence = nullptr; + if (@available(macOS 10.14, iOS 12.0, tvOS 12.0, visionOS 1.0, *)) { + fence = memnew(FenceEvent([device newSharedEvent])); + } else { + fence = memnew(FenceSemaphore()); + } return FenceID(fence); } Error RenderingDeviceDriverMetal::fence_wait(FenceID p_fence) { Fence *fence = (Fence *)(p_fence.id); - - // Wait forever, so this function is infallible. - dispatch_semaphore_wait(fence->semaphore, DISPATCH_TIME_FOREVER); - - return OK; + return fence->wait(1000); } void RenderingDeviceDriverMetal::fence_free(FenceID p_fence) { @@ -924,15 +913,9 @@ static const API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MTLSamplerBorderC MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[size - 1].id); Fence *fence = (Fence *)(p_cmd_fence.id); if (fence != nullptr) { + cmd_buffer->end(); id cb = cmd_buffer->get_command_buffer(); - if (cb == nil) { - // If there is nothing to do, signal the fence immediately. - dispatch_semaphore_signal(fence->semaphore); - } else { - [cb addCompletedHandler:^(id buffer) { - dispatch_semaphore_signal(fence->semaphore); - }]; - } + fence->signal(cb); } for (uint32_t i = 0; i < p_swap_chains.size(); i++) { @@ -1133,18 +1116,27 @@ static const API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MTLSamplerBorderC } } +template +struct is_layout_compatible + : std::bool_constant< + sizeof(T) == sizeof(U) && + alignof(T) == alignof(U) && + std::is_trivially_copyable_v && + std::is_trivially_copyable_v> {}; +static_assert(is_layout_compatible::value, "UniformInfo::Indexes layout does not match RenderingShaderContainerMetal::UniformData::Indexes layout"); + API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) -static BindingInfo from_binding_info_data(const RenderingShaderContainerMetal::BindingInfoData &p_data) { - BindingInfo bi; - bi.dataType = static_cast(p_data.data_type); - bi.index = p_data.index; - bi.access = static_cast(p_data.access); - bi.usage = static_cast(p_data.usage); - bi.textureType = static_cast(p_data.texture_type); - bi.imageFormat = p_data.image_format; - bi.arrayLength = p_data.array_length; - bi.isMultisampled = p_data.is_multisampled; - return bi; +static void update_uniform_info(const RenderingShaderContainerMetal::UniformData &p_data, UniformInfo &r_ui) { + r_ui.active_stages = p_data.active_stages; + r_ui.dataType = static_cast(p_data.data_type); + memcpy(&r_ui.slot, &p_data.slot, sizeof(UniformInfo::Indexes)); + memcpy(&r_ui.arg_buffer, &p_data.arg_buffer, sizeof(UniformInfo::Indexes)); + r_ui.access = static_cast(p_data.access); + r_ui.usage = static_cast(p_data.usage); + r_ui.textureType = static_cast(p_data.texture_type); + r_ui.imageFormat = p_data.image_format; + r_ui.arrayLength = p_data.array_length; + r_ui.isMultisampled = p_data.is_multisampled; } RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) { @@ -1156,13 +1148,12 @@ static BindingInfo from_binding_info_data(const RenderingShaderContainerMetal::B Vector &shaders = shader_container->shaders; Vector &mtl_shaders = shader_container->mtl_shaders; - // We need to regenerate the shader if the cache is moved to an incompatible device. - ERR_FAIL_COND_V_MSG(device_properties->features.argument_buffers_tier < MTLArgumentBuffersTier2 && mtl_reflection_data.uses_argument_buffers(), + // We need to regenerate the shader if the cache is moved to an incompatible device or argument buffer support differs. + ERR_FAIL_COND_V_MSG(!device_properties->features.argument_buffers_supported() && mtl_reflection_data.uses_argument_buffers(), RDD::ShaderID(), "Shader was compiled with argument buffers enabled, but this device does not support them"); - uint32_t msl_version = make_msl_version(device_properties->features.mslVersionMajor, device_properties->features.mslVersionMinor); - ERR_FAIL_COND_V_MSG(msl_version < mtl_reflection_data.msl_version, + ERR_FAIL_COND_V_MSG(device_properties->features.msl_max_version < mtl_reflection_data.msl_version, RDD::ShaderID(), "Shader was compiled for a newer version of Metal"); @@ -1175,6 +1166,10 @@ static BindingInfo from_binding_info_data(const RenderingShaderContainerMetal::B uint32_t major = mtl_reflection_data.msl_version / 10000; uint32_t minor = (mtl_reflection_data.msl_version / 100) % 100; options.languageVersion = MTLLanguageVersion((major << 0x10) + minor); + if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) { + options.enableLogging = mtl_reflection_data.needs_debug_logging(); + } + HashMap libraries; bool is_compute = false; @@ -1222,7 +1217,7 @@ static BindingInfo from_binding_info_data(const RenderingShaderContainerMetal::B data:binary]; } else { options.preserveInvariance = shader_data.is_position_invariant; -#if defined(VISIONOS_ENABLED) +#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 150000 || __IPHONE_OS_VERSION_MIN_REQUIRED >= 180000 || __TV_OS_VERSION_MIN_REQUIRED >= 180000 || defined(VISIONOS_ENABLED) options.mathMode = MTLMathModeFast; #else options.fastMathEnabled = YES; @@ -1247,7 +1242,6 @@ static BindingInfo from_binding_info_data(const RenderingShaderContainerMetal::B DynamicOffsetLayout dynamic_offset_layout; uint8_t dynamic_offset = 0; - uint8_t dynamic_count = 0; // Create sets. for (uint32_t i = 0; i < uniform_sets_count; i++) { @@ -1257,6 +1251,8 @@ static BindingInfo from_binding_info_data(const RenderingShaderContainerMetal::B uint32_t set_size = mtl_set.size(); set.uniforms.resize(set_size); + uint8_t dynamic_count = 0; + LocalVector::Iterator iter = set.uniforms.begin(); for (uint32_t j = 0; j < set_size; j++) { const ShaderUniform &uniform = refl_set.ptr()[j]; @@ -1274,89 +1270,34 @@ static BindingInfo from_binding_info_data(const RenderingShaderContainerMetal::B UniformInfo &ui = *iter; ++iter; + update_uniform_info(bind, ui); ui.binding = uniform.binding; - ui.active_stages = static_cast(bind.active_stages); - for (const RSCM::BindingInfoData &info : bind.bindings) { - if (info.shader_stage == UINT32_MAX) { - continue; - } - BindingInfo bi = from_binding_info_data(info); - ui.bindings.insert((RDC::ShaderStage)info.shader_stage, bi); - } - for (const RSCM::BindingInfoData &info : bind.bindings_secondary) { - if (info.shader_stage == UINT32_MAX) { - continue; - } - BindingInfo bi = from_binding_info_data(info); - ui.bindings_secondary.insert((RDC::ShaderStage)info.shader_stage, bi); + if (ui.arg_buffer.texture == UINT32_MAX && ui.arg_buffer.buffer == UINT32_MAX && ui.arg_buffer.sampler == UINT32_MAX) { + // No bindings. + continue; } +#define VAL(x) (x == UINT32_MAX ? 0 : x) + uint32_t max = std::max({ VAL(ui.arg_buffer.texture), VAL(ui.arg_buffer.buffer), VAL(ui.arg_buffer.sampler) }); + max += ui.arrayLength > 0 ? ui.arrayLength - 1 : 0; + set.buffer_size = std::max(set.buffer_size, (max + 1) * (uint32_t)sizeof(uint64_t)); +#undef VAL } + if (dynamic_count > 0) { dynamic_offset_layout.set_offset_count(i, dynamic_offset, dynamic_count); dynamic_offset += dynamic_count; - dynamic_count = 0; - } - } - - for (uint32_t i = 0; i < uniform_sets_count; i++) { - UniformSet &set = uniform_sets.write[i]; - - // Make encoders. - for (RenderingShaderContainer::Shader const &shader : shaders) { - RD::ShaderStage stage = shader.shader_stage; - NSMutableArray *descriptors = [NSMutableArray new]; - - for (UniformInfo const &uniform : set.uniforms) { - BindingInfo const *binding_info = uniform.bindings.getptr(stage); - if (binding_info == nullptr) { - continue; - } - - [descriptors addObject:binding_info->new_argument_descriptor()]; - BindingInfo const *secondary_binding_info = uniform.bindings_secondary.getptr(stage); - if (secondary_binding_info != nullptr) { - [descriptors addObject:secondary_binding_info->new_argument_descriptor()]; - } - } - - if (descriptors.count == 0) { - // No bindings. - continue; - } - // Sort by index. - [descriptors sortUsingComparator:^NSComparisonResult(MTLArgumentDescriptor *a, MTLArgumentDescriptor *b) { - if (a.index < b.index) { - return NSOrderedAscending; - } else if (a.index > b.index) { - return NSOrderedDescending; - } else { - return NSOrderedSame; - } - }]; - - id enc = [device newArgumentEncoderWithArguments:descriptors]; - set.encoders[stage] = enc; - set.offsets[stage] = set.buffer_size; - set.buffer_size += enc.encodedLength; } } MDShader *shader = nullptr; if (is_compute) { - const RSCM::StageData &stage_data = mtl_shaders[0]; - MDComputeShader *cs = new MDComputeShader( shader_name, uniform_sets, mtl_reflection_data.uses_argument_buffers(), libraries[RD::ShaderStage::SHADER_STAGE_COMPUTE]); - if (stage_data.push_constant_binding != UINT32_MAX) { - cs->push_constants.size = refl.push_constant_size; - cs->push_constants.binding = stage_data.push_constant_binding; - } - cs->local = MTLSizeMake(refl.compute_local_size[0], refl.compute_local_size[1], refl.compute_local_size[2]); shader = cs; } else { @@ -1367,30 +1308,12 @@ static BindingInfo from_binding_info_data(const RenderingShaderContainerMetal::B mtl_reflection_data.uses_argument_buffers(), libraries[RD::ShaderStage::SHADER_STAGE_VERTEX], libraries[RD::ShaderStage::SHADER_STAGE_FRAGMENT]); - - for (uint32_t j = 0; j < shaders.size(); j++) { - const RSCM::StageData &stage_data = mtl_shaders[j]; - switch (shaders[j].shader_stage) { - case RD::ShaderStage::SHADER_STAGE_VERTEX: { - if (stage_data.push_constant_binding != UINT32_MAX) { - rs->push_constants.vert.size = refl.push_constant_size; - rs->push_constants.vert.binding = stage_data.push_constant_binding; - } - } break; - case RD::ShaderStage::SHADER_STAGE_FRAGMENT: { - if (stage_data.push_constant_binding != UINT32_MAX) { - rs->push_constants.frag.size = refl.push_constant_size; - rs->push_constants.frag.binding = stage_data.push_constant_binding; - } - } break; - default: { - ERR_FAIL_V_MSG(RDD::ShaderID(), "Invalid shader stage"); - } break; - } - } shader = rs; } + shader->push_constants.stages = refl.push_constant_stages; + shader->push_constants.size = refl.push_constant_size; + shader->push_constants.binding = mtl_reflection_data.push_constant_binding; shader->dynamic_offset_layout = dynamic_offset_layout; return RDD::ShaderID(shader); @@ -1412,7 +1335,135 @@ static BindingInfo from_binding_info_data(const RenderingShaderContainerMetal::B RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorView p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) { //p_linear_pool_index = -1; // TODO:? Linear pools not implemented or not supported by API backend. + MDShader *shader = (MDShader *)(p_shader.id); + ERR_FAIL_INDEX_V_MSG(p_set_index, shader->sets.size(), UniformSetID(), "Set index out of range"); + const UniformSet &shader_set = shader->sets.get(p_set_index); MDUniformSet *set = memnew(MDUniformSet); + + if (device_properties->features.argument_buffers_supported()) { + // If argument buffers are enabled, we have already verified availability, so we can skip the runtime check. + GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability-new") + + set->arg_buffer = [device newBufferWithLength:shader_set.buffer_size options:MTLResourceStorageModeShared]; + uint64_t *ptr = (uint64_t *)set->arg_buffer.contents; + + HashMap bound_resources; + auto add_usage = [&bound_resources](MTLResourceUnsafe res, BitField stage, MTLResourceUsage usage) { + StageResourceUsage *sru = bound_resources.getptr(res); + if (sru == nullptr) { + sru = &bound_resources.insert(res, ResourceUnused)->value; + } + if (stage.has_flag(RDD::SHADER_STAGE_VERTEX_BIT)) { + *sru |= stage_resource_usage(RDD::SHADER_STAGE_VERTEX, usage); + } + if (stage.has_flag(RDD::SHADER_STAGE_FRAGMENT_BIT)) { + *sru |= stage_resource_usage(RDD::SHADER_STAGE_FRAGMENT, usage); + } + if (stage.has_flag(RDD::SHADER_STAGE_COMPUTE_BIT)) { + *sru |= stage_resource_usage(RDD::SHADER_STAGE_COMPUTE, usage); + } + }; + + // Ensure the argument buffer exists for this set as some shader pipelines may + // have been generated with argument buffers enabled. + for (uint32_t i = 0; i < p_uniforms.size(); i += 1) { + const BoundUniform &uniform = p_uniforms[i]; + const UniformInfo &ui = shader_set.uniforms[i]; + const UniformInfo::Indexes &idx = ui.arg_buffer; + + switch (uniform.type) { + case UNIFORM_TYPE_SAMPLER: { + size_t count = uniform.ids.size(); + for (size_t j = 0; j < count; j += 1) { + id sampler = rid::get(uniform.ids[j]); + *(MTLResourceID *)(ptr + idx.sampler + j) = sampler.gpuResourceID; + } + } break; + case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: { + uint32_t count = uniform.ids.size() / 2; + for (uint32_t j = 0; j < count; j += 1) { + id sampler = rid::get(uniform.ids[j * 2 + 0]); + id texture = rid::get(uniform.ids[j * 2 + 1]); + *(MTLResourceID *)(ptr + idx.texture + j) = texture.gpuResourceID; + *(MTLResourceID *)(ptr + idx.sampler + j) = sampler.gpuResourceID; + + add_usage(texture, ui.active_stages, ui.usage); + } + } break; + case UNIFORM_TYPE_TEXTURE: { + size_t count = uniform.ids.size(); + for (size_t j = 0; j < count; j += 1) { + id texture = rid::get(uniform.ids[j]); + *(MTLResourceID *)(ptr + idx.texture + j) = texture.gpuResourceID; + + add_usage(texture, ui.active_stages, ui.usage); + } + } break; + case UNIFORM_TYPE_IMAGE: { + size_t count = uniform.ids.size(); + for (size_t j = 0; j < count; j += 1) { + id texture = rid::get(uniform.ids[j]); + *(MTLResourceID *)(ptr + idx.texture + j) = texture.gpuResourceID; + add_usage(texture, ui.active_stages, ui.usage); + + if (idx.buffer != UINT32_MAX) { + // Emulated atomic image access. + id buffer = (texture.parentTexture ? texture.parentTexture : texture).buffer; + *(MTLGPUAddress *)(ptr + idx.buffer + j) = buffer.gpuAddress; + + add_usage(buffer, ui.active_stages, ui.usage); + } + } + } break; + case UNIFORM_TYPE_TEXTURE_BUFFER: { + ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER"); + } break; + case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: { + ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER"); + } break; + case UNIFORM_TYPE_IMAGE_BUFFER: { + CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER"); + } break; + case UNIFORM_TYPE_STORAGE_BUFFER: + case UNIFORM_TYPE_UNIFORM_BUFFER: { + const BufferInfo *buffer = (const BufferInfo *)uniform.ids[0].id; + *(MTLGPUAddress *)(ptr + idx.buffer) = buffer->metal_buffer.gpuAddress; + + add_usage(buffer->metal_buffer, ui.active_stages, ui.usage); + } break; + case UNIFORM_TYPE_INPUT_ATTACHMENT: { + size_t count = uniform.ids.size(); + for (size_t j = 0; j < count; j += 1) { + id texture = rid::get(uniform.ids[j]); + *(MTLResourceID *)(ptr + idx.texture + j) = texture.gpuResourceID; + + add_usage(texture, ui.active_stages, ui.usage); + } + } break; + case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: + case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: { + // Dynamic buffers are not supported by argument buffers currently. + // so we do not encode them, as there shouldn't be any runtime shaders that used them. + } break; + default: { + DEV_ASSERT(false); + } + } + } + + for (KeyValue const &keyval : bound_resources) { + ResourceVector *resources = set->usage_to_resources.getptr(keyval.value); + if (resources == nullptr) { + resources = &set->usage_to_resources.insert(keyval.value, ResourceVector())->value; + } + int64_t pos = resources->span().bisect(keyval.key, true); + if (pos == resources->size() || (*resources)[pos] != keyval.key) { + resources->insert(pos, keyval.key); + } + } + + GODOT_CLANG_WARNING_POP + } Vector bound_uniforms; bound_uniforms.resize(p_uniforms.size()); for (uint32_t i = 0; i < p_uniforms.size(); i += 1) { @@ -1468,350 +1519,37 @@ static BindingInfo from_binding_info_data(const RenderingShaderContainerMetal::B void RenderingDeviceDriverMetal::command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) { MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); - - id blit = cmd->blit_command_encoder(); - [blit fillBuffer:((const BufferInfo *)p_buffer.id)->metal_buffer - range:NSMakeRange(p_offset, p_size) - value:0]; + cmd->clear_buffer(p_buffer, p_offset, p_size); } void RenderingDeviceDriverMetal::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView p_regions) { MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); - const BufferInfo *src = (const BufferInfo *)p_src_buffer.id; - const BufferInfo *dst = (const BufferInfo *)p_dst_buffer.id; - - id blit = cmd->blit_command_encoder(); - - for (uint32_t i = 0; i < p_regions.size(); i++) { - BufferCopyRegion region = p_regions[i]; - [blit copyFromBuffer:src->metal_buffer - sourceOffset:region.src_offset - toBuffer:dst->metal_buffer - destinationOffset:region.dst_offset - size:region.size]; - } -} - -MTLSize MTLSizeFromVector3i(Vector3i p_size) { - return MTLSizeMake(p_size.x, p_size.y, p_size.z); -} - -MTLOrigin MTLOriginFromVector3i(Vector3i p_origin) { - return MTLOriginMake(p_origin.x, p_origin.y, p_origin.z); -} - -// Clamps the size so that the sum of the origin and size do not exceed the maximum size. -static inline MTLSize clampMTLSize(MTLSize p_size, MTLOrigin p_origin, MTLSize p_max_size) { - MTLSize clamped; - clamped.width = MIN(p_size.width, p_max_size.width - p_origin.x); - clamped.height = MIN(p_size.height, p_max_size.height - p_origin.y); - clamped.depth = MIN(p_size.depth, p_max_size.depth - p_origin.z); - return clamped; + cmd->copy_buffer(p_src_buffer, p_dst_buffer, p_regions); } void RenderingDeviceDriverMetal::command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView p_regions) { MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); - id src = rid::get(p_src_texture); - id dst = rid::get(p_dst_texture); - - id blit = cmd->blit_command_encoder(); - PixelFormats &pf = *pixel_formats; - - MTLPixelFormat src_fmt = src.pixelFormat; - bool src_is_compressed = pf.getFormatType(src_fmt) == MTLFormatType::Compressed; - MTLPixelFormat dst_fmt = dst.pixelFormat; - bool dst_is_compressed = pf.getFormatType(dst_fmt) == MTLFormatType::Compressed; - - // Validate copy. - if (src.sampleCount != dst.sampleCount || pf.getBytesPerBlock(src_fmt) != pf.getBytesPerBlock(dst_fmt)) { - ERR_FAIL_MSG("Cannot copy between incompatible pixel formats, such as formats of different pixel sizes, or between images with different sample counts."); - } - - // If source and destination have different formats and at least one is compressed, a temporary buffer is required. - bool need_tmp_buffer = (src_fmt != dst_fmt) && (src_is_compressed || dst_is_compressed); - if (need_tmp_buffer) { - ERR_FAIL_MSG("not implemented: copy with intermediate buffer"); - } - - if (src_fmt != dst_fmt) { - // Map the source pixel format to the dst through a texture view on the source texture. - src = [src newTextureViewWithPixelFormat:dst_fmt]; - } - - for (uint32_t i = 0; i < p_regions.size(); i++) { - TextureCopyRegion region = p_regions[i]; - - MTLSize extent = MTLSizeFromVector3i(region.size); - - // If copies can be performed using direct texture-texture copying, do so. - uint32_t src_level = region.src_subresources.mipmap; - uint32_t src_base_layer = region.src_subresources.base_layer; - MTLSize src_extent = mipmapLevelSizeFromTexture(src, src_level); - uint32_t dst_level = region.dst_subresources.mipmap; - uint32_t dst_base_layer = region.dst_subresources.base_layer; - MTLSize dst_extent = mipmapLevelSizeFromTexture(dst, dst_level); - - // All layers may be copied at once, if the extent completely covers both images. - if (src_extent == extent && dst_extent == extent) { - [blit copyFromTexture:src - sourceSlice:src_base_layer - sourceLevel:src_level - toTexture:dst - destinationSlice:dst_base_layer - destinationLevel:dst_level - sliceCount:region.src_subresources.layer_count - levelCount:1]; - } else { - MTLOrigin src_origin = MTLOriginFromVector3i(region.src_offset); - MTLSize src_size = clampMTLSize(extent, src_origin, src_extent); - uint32_t layer_count = 0; - if ((src.textureType == MTLTextureType3D) != (dst.textureType == MTLTextureType3D)) { - // In the case, the number of layers to copy is in extent.depth. Use that value, - // then clamp the depth, so we don't try to copy more than Metal will allow. - layer_count = extent.depth; - src_size.depth = 1; - } else { - layer_count = region.src_subresources.layer_count; - } - MTLOrigin dst_origin = MTLOriginFromVector3i(region.dst_offset); - - for (uint32_t layer = 0; layer < layer_count; layer++) { - // We can copy between a 3D and a 2D image easily. Just copy between - // one slice of the 2D image and one plane of the 3D image at a time. - if ((src.textureType == MTLTextureType3D) == (dst.textureType == MTLTextureType3D)) { - [blit copyFromTexture:src - sourceSlice:src_base_layer + layer - sourceLevel:src_level - sourceOrigin:src_origin - sourceSize:src_size - toTexture:dst - destinationSlice:dst_base_layer + layer - destinationLevel:dst_level - destinationOrigin:dst_origin]; - } else if (src.textureType == MTLTextureType3D) { - [blit copyFromTexture:src - sourceSlice:src_base_layer - sourceLevel:src_level - sourceOrigin:MTLOriginMake(src_origin.x, src_origin.y, src_origin.z + layer) - sourceSize:src_size - toTexture:dst - destinationSlice:dst_base_layer + layer - destinationLevel:dst_level - destinationOrigin:dst_origin]; - } else { - DEV_ASSERT(dst.textureType == MTLTextureType3D); - [blit copyFromTexture:src - sourceSlice:src_base_layer + layer - sourceLevel:src_level - sourceOrigin:src_origin - sourceSize:src_size - toTexture:dst - destinationSlice:dst_base_layer - destinationLevel:dst_level - destinationOrigin:MTLOriginMake(dst_origin.x, dst_origin.y, dst_origin.z + layer)]; - } - } - } - } + cmd->copy_texture(p_src_texture, p_dst_texture, p_regions); } void RenderingDeviceDriverMetal::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) { MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); - id src_tex = rid::get(p_src_texture); - id dst_tex = rid::get(p_dst_texture); - - MTLRenderPassDescriptor *mtlRPD = [MTLRenderPassDescriptor renderPassDescriptor]; - MTLRenderPassColorAttachmentDescriptor *mtlColorAttDesc = mtlRPD.colorAttachments[0]; - mtlColorAttDesc.loadAction = MTLLoadActionLoad; - mtlColorAttDesc.storeAction = MTLStoreActionMultisampleResolve; - - mtlColorAttDesc.texture = src_tex; - mtlColorAttDesc.resolveTexture = dst_tex; - mtlColorAttDesc.level = p_src_mipmap; - mtlColorAttDesc.slice = p_src_layer; - mtlColorAttDesc.resolveLevel = p_dst_mipmap; - mtlColorAttDesc.resolveSlice = p_dst_layer; - cb->encodeRenderCommandEncoderWithDescriptor(mtlRPD, @"Resolve Image"); + cb->resolve_texture(p_src_texture, p_src_texture_layout, p_src_layer, p_src_mipmap, p_dst_texture, p_dst_texture_layout, p_dst_layer, p_dst_mipmap); } void RenderingDeviceDriverMetal::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) { MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); - id src_tex = rid::get(p_texture); - - if (src_tex.parentTexture) { - // Clear via the parent texture rather than the view. - src_tex = src_tex.parentTexture; - } - - PixelFormats &pf = *pixel_formats; - - if (pf.isDepthFormat(src_tex.pixelFormat) || pf.isStencilFormat(src_tex.pixelFormat)) { - ERR_FAIL_MSG("invalid: depth or stencil texture format"); - } - - MTLRenderPassDescriptor *desc = MTLRenderPassDescriptor.renderPassDescriptor; - - if (p_subresources.aspect.has_flag(TEXTURE_ASPECT_COLOR_BIT)) { - MTLRenderPassColorAttachmentDescriptor *caDesc = desc.colorAttachments[0]; - caDesc.texture = src_tex; - caDesc.loadAction = MTLLoadActionClear; - caDesc.storeAction = MTLStoreActionStore; - caDesc.clearColor = MTLClearColorMake(p_color.r, p_color.g, p_color.b, p_color.a); - - // Extract the mipmap levels that are to be updated. - uint32_t mipLvlStart = p_subresources.base_mipmap; - uint32_t mipLvlCnt = p_subresources.mipmap_count; - uint32_t mipLvlEnd = mipLvlStart + mipLvlCnt; - - uint32_t levelCount = src_tex.mipmapLevelCount; - - // Extract the cube or array layers (slices) that are to be updated. - bool is3D = src_tex.textureType == MTLTextureType3D; - uint32_t layerStart = is3D ? 0 : p_subresources.base_layer; - uint32_t layerCnt = p_subresources.layer_count; - uint32_t layerEnd = layerStart + layerCnt; - - MetalFeatures const &features = (*device_properties).features; - - // Iterate across mipmap levels and layers, and perform and empty render to clear each. - for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) { - ERR_FAIL_INDEX_MSG(mipLvl, levelCount, "mip level out of range"); - - caDesc.level = mipLvl; - - // If a 3D image, we need to get the depth for each level. - if (is3D) { - layerCnt = mipmapLevelSizeFromTexture(src_tex, mipLvl).depth; - layerEnd = layerStart + layerCnt; - } - - if ((features.layeredRendering && src_tex.sampleCount == 1) || features.multisampleLayeredRendering) { - // We can clear all layers at once. - if (is3D) { - caDesc.depthPlane = layerStart; - } else { - caDesc.slice = layerStart; - } - desc.renderTargetArrayLength = layerCnt; - cb->encodeRenderCommandEncoderWithDescriptor(desc, @"Clear Image"); - } else { - for (uint32_t layer = layerStart; layer < layerEnd; layer++) { - if (is3D) { - caDesc.depthPlane = layer; - } else { - caDesc.slice = layer; - } - cb->encodeRenderCommandEncoderWithDescriptor(desc, @"Clear Image"); - } - } - } - } -} - -API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) -bool isArrayTexture(MTLTextureType p_type) { - return (p_type == MTLTextureType3D || - p_type == MTLTextureType2DArray || - p_type == MTLTextureType2DMultisampleArray || - p_type == MTLTextureType1DArray); -} - -void RenderingDeviceDriverMetal::_copy_texture_buffer(CommandBufferID p_cmd_buffer, - CopySource p_source, - TextureID p_texture, - BufferID p_buffer, - VectorView p_regions) { - MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); - const BufferInfo *buffer = (const BufferInfo *)p_buffer.id; - id texture = rid::get(p_texture); - - id enc = cmd->blit_command_encoder(); - - PixelFormats &pf = *pixel_formats; - MTLPixelFormat mtlPixFmt = texture.pixelFormat; - - MTLBlitOption options = MTLBlitOptionNone; - if (pf.isPVRTCFormat(mtlPixFmt)) { - options |= MTLBlitOptionRowLinearPVRTC; - } - - for (uint32_t i = 0; i < p_regions.size(); i++) { - BufferTextureCopyRegion region = p_regions[i]; - - uint32_t mip_level = region.texture_subresources.mipmap; - MTLOrigin txt_origin = MTLOriginMake(region.texture_offset.x, region.texture_offset.y, region.texture_offset.z); - MTLSize src_extent = mipmapLevelSizeFromTexture(texture, mip_level); - MTLSize txt_size = clampMTLSize(MTLSizeMake(region.texture_region_size.x, region.texture_region_size.y, region.texture_region_size.z), - txt_origin, - src_extent); - - uint32_t buffImgWd = region.texture_region_size.x; - uint32_t buffImgHt = region.texture_region_size.y; - - NSUInteger bytesPerRow = pf.getBytesPerRow(mtlPixFmt, buffImgWd); - NSUInteger bytesPerImg = pf.getBytesPerLayer(mtlPixFmt, bytesPerRow, buffImgHt); - - MTLBlitOption blit_options = options; - - if (pf.isDepthFormat(mtlPixFmt) && pf.isStencilFormat(mtlPixFmt)) { - bool want_depth = flags::all(region.texture_subresources.aspect, TEXTURE_ASPECT_DEPTH_BIT); - bool want_stencil = flags::all(region.texture_subresources.aspect, TEXTURE_ASPECT_STENCIL_BIT); - - // The stencil component is always 1 byte per pixel. - // Don't reduce depths of 32-bit depth/stencil formats. - if (want_depth && !want_stencil) { - if (pf.getBytesPerTexel(mtlPixFmt) != 4) { - bytesPerRow -= buffImgWd; - bytesPerImg -= buffImgWd * buffImgHt; - } - blit_options |= MTLBlitOptionDepthFromDepthStencil; - } else if (want_stencil && !want_depth) { - bytesPerRow = buffImgWd; - bytesPerImg = buffImgWd * buffImgHt; - blit_options |= MTLBlitOptionStencilFromDepthStencil; - } - } - - if (!isArrayTexture(texture.textureType)) { - bytesPerImg = 0; - } - - if (p_source == CopySource::Buffer) { - for (uint32_t lyrIdx = 0; lyrIdx < region.texture_subresources.layer_count; lyrIdx++) { - [enc copyFromBuffer:buffer->metal_buffer - sourceOffset:region.buffer_offset + (bytesPerImg * lyrIdx) - sourceBytesPerRow:bytesPerRow - sourceBytesPerImage:bytesPerImg - sourceSize:txt_size - toTexture:texture - destinationSlice:region.texture_subresources.base_layer + lyrIdx - destinationLevel:mip_level - destinationOrigin:txt_origin - options:blit_options]; - } - } else { - for (uint32_t lyrIdx = 0; lyrIdx < region.texture_subresources.layer_count; lyrIdx++) { - [enc copyFromTexture:texture - sourceSlice:region.texture_subresources.base_layer + lyrIdx - sourceLevel:mip_level - sourceOrigin:txt_origin - sourceSize:txt_size - toBuffer:buffer->metal_buffer - destinationOffset:region.buffer_offset + (bytesPerImg * lyrIdx) - destinationBytesPerRow:bytesPerRow - destinationBytesPerImage:bytesPerImg - options:blit_options]; - } - } - } + cb->clear_color_texture(p_texture, p_texture_layout, p_color, p_subresources); } void RenderingDeviceDriverMetal::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView p_regions) { - _copy_texture_buffer(p_cmd_buffer, CopySource::Buffer, p_dst_texture, p_src_buffer, p_regions); + MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); + cmd->copy_buffer_to_texture(p_src_buffer, p_dst_texture, p_regions); } void RenderingDeviceDriverMetal::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView p_regions) { - _copy_texture_buffer(p_cmd_buffer, CopySource::Texture, p_src_texture, p_dst_buffer, p_regions); + MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id); + cmd->copy_texture_to_buffer(p_src_texture, p_dst_buffer, p_regions); } #pragma mark - Pipeline @@ -2558,13 +2296,12 @@ bool isArrayTexture(MTLTextureType p_type) { void RenderingDeviceDriverMetal::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) { MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); - NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)p_label_name length:strlen(p_label_name) encoding:NSUTF8StringEncoding freeWhenDone:NO]; - [cb->get_command_buffer() pushDebugGroup:s]; + cb->begin_label(p_label_name, p_color); } void RenderingDeviceDriverMetal::command_end_label(CommandBufferID p_cmd_buffer) { MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); - [cb->get_command_buffer() popDebugGroup]; + cb->end_label(); } #pragma mark - Debug @@ -2576,8 +2313,8 @@ bool isArrayTexture(MTLTextureType p_type) { #pragma mark - Submission void RenderingDeviceDriverMetal::begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) { - frame_index = p_frame_index; - frames_drawn = p_frames_drawn; + _frame_index = p_frame_index; + _frames_drawn = p_frames_drawn; } void RenderingDeviceDriverMetal::end_segment() { @@ -2612,9 +2349,7 @@ bool isArrayTexture(MTLTextureType p_type) { } break; case OBJECT_TYPE_UNIFORM_SET: { MDUniformSet *set = (MDUniformSet *)(p_driver_id.id); - for (KeyValue &keyval : set->bound_uniforms) { - keyval.value.buffer.label = [NSString stringWithUTF8String:p_name.utf8().get_data()]; - } + set->arg_buffer.label = [NSString stringWithUTF8String:p_name.utf8().get_data()]; } break; case OBJECT_TYPE_PIPELINE: { // Can't set label after creation. @@ -2804,7 +2539,9 @@ bool isArrayTexture(MTLTextureType p_type) { uint64_t RenderingDeviceDriverMetal::api_trait_get(ApiTrait p_trait) { switch (p_trait) { case API_TRAIT_HONORS_PIPELINE_BARRIERS: - return 0; + return false; + case API_TRAIT_CLEARS_WITH_COPY_ENGINE: + return false; default: return RenderingDeviceDriver::api_trait_get(p_trait); } @@ -2925,31 +2662,26 @@ bool isArrayTexture(MTLTextureType p_type) { void RenderingDeviceDriverMetal::_check_capabilities() { capabilities.device_family = DEVICE_METAL; - capabilities.version_major = device_properties->features.mslVersionMajor; - capabilities.version_minor = device_properties->features.mslVersionMinor; + parse_msl_version(device_properties->features.msl_target_version, capabilities.version_major, capabilities.version_minor); } API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) static MetalDeviceProfile device_profile_from_properties(MetalDeviceProperties *p_device_properties) { using DP = MetalDeviceProfile; + NSOperatingSystemVersion os_version = NSProcessInfo.processInfo.operatingSystemVersion; MetalDeviceProfile res; + res.min_os_version = MinOsVersion(os_version.majorVersion, os_version.minorVersion, os_version.patchVersion); #if TARGET_OS_OSX res.platform = DP::Platform::macOS; - res.features = { - .mslVersionMajor = p_device_properties->features.mslVersionMajor, - .mslVersionMinor = p_device_properties->features.mslVersionMinor, - .argument_buffers_tier = DP::ArgumentBuffersTier::Tier2, - .simdPermute = true - }; #else res.platform = DP::Platform::iOS; +#endif res.features = { - .mslVersionMajor = p_device_properties->features.mslVersionMajor, - .mslVersionMinor = p_device_properties->features.mslVersionMinor, - .argument_buffers_tier = p_device_properties->features.argument_buffers_tier == MTLArgumentBuffersTier1 ? DP::ArgumentBuffersTier::Tier1 : DP::ArgumentBuffersTier::Tier2, + .msl_version = p_device_properties->features.msl_target_version, + .use_argument_buffers = p_device_properties->features.argument_buffers_enabled(), .simdPermute = p_device_properties->features.simdPermute, }; -#endif + // highestFamily will only be set to an Apple GPU family switch (p_device_properties->features.highestFamily) { case MTLGPUFamilyApple1: @@ -2985,8 +2717,6 @@ static MetalDeviceProfile device_profile_from_properties(MetalDeviceProperties * } break; } - res.update_options(); - return res; } @@ -3001,7 +2731,7 @@ static MetalDeviceProfile device_profile_from_properties(MetalDeviceProperties * _check_capabilities(); - frame_count = p_frame_count; + _frame_count = p_frame_count; // Set the pipeline cache ID based on the Metal version. pipeline_cache_id = "metal-driver-" + get_api_version(); diff --git a/drivers/metal/rendering_shader_container_metal.h b/drivers/metal/rendering_shader_container_metal.h index b85cfad9e379..24af5783db4c 100644 --- a/drivers/metal/rendering_shader_container_metal.h +++ b/drivers/metal/rendering_shader_container_metal.h @@ -30,6 +30,7 @@ #pragma once +#import "metal_device_profile.h" #import "sha256_digest.h" #import "servers/rendering/rendering_device_driver.h" @@ -41,92 +42,6 @@ const uint32_t VIEW_MASK_BUFFER_INDEX = 24; class RenderingShaderContainerFormatMetal; -class MinOsVersion { - uint32_t version; - -public: - String to_compiler_os_version() const; - bool is_null() const { return version == UINT32_MAX; } - bool is_valid() const { return version != UINT32_MAX; } - - MinOsVersion(const String &p_version); - explicit MinOsVersion(uint32_t p_version) : - version(p_version) {} - MinOsVersion() : - version(UINT32_MAX) {} - - bool operator>(uint32_t p_other) { - return version > p_other; - } -}; - -/// @brief A minimal structure that defines a device profile for Metal. -/// -/// This structure is used by the `RenderingShaderContainerMetal` class to -/// determine options for compiling SPIR-V to Metal source. It currently only -/// contains the minimum properties required to transform shaders from SPIR-V to Metal -/// and potentially compile to a `.metallib`. -struct MetalDeviceProfile { - enum class Platform : uint32_t { - macOS = 0, - iOS = 1, - }; - - /*! @brief The GPU family. - * - * NOTE: These values match Apple's MTLGPUFamily - */ - enum class GPU : uint32_t { - Apple1 = 1001, - Apple2 = 1002, - Apple3 = 1003, - Apple4 = 1004, - Apple5 = 1005, - Apple6 = 1006, - Apple7 = 1007, - Apple8 = 1008, - Apple9 = 1009, - }; - - enum class ArgumentBuffersTier : uint32_t { - Tier1 = 0, - Tier2 = 1, - }; - - struct Features { - uint32_t mslVersionMajor = 0; - uint32_t mslVersionMinor = 0; - ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1; - bool simdPermute = false; - }; - - /** - * @brief Options to configure the Metal device profile. - * - * This structure allows customization of the Metal device profile, - * such as the argument buffers tier, which can affect how shaders are compiled. - */ - struct Options { - ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1; - }; - - Platform platform = Platform::macOS; - GPU gpu = GPU::Apple4; - Features features; - Options options; - - static const MetalDeviceProfile *get_profile(Platform p_platform, GPU p_gpu); - - // Configure any options for the device profile, which may include overrides from the environment. - void update_options(); - - MetalDeviceProfile() = default; - -private: - static Mutex profiles_lock; ///< Mutex to protect access to the profiles map. - static HashMap profiles; -}; - class RenderingShaderContainerMetal : public RenderingShaderContainer { GDSOFTCLASS(RenderingShaderContainerMetal, RenderingShaderContainer); @@ -136,6 +51,7 @@ class RenderingShaderContainerMetal : public RenderingShaderContainer { NONE = 0, NEEDS_VIEW_MASK_BUFFER = 1 << 0, USES_ARGUMENT_BUFFERS = 1 << 1, + NEEDS_DEBUG_LOGGING = 1 << 2, }; /// The base profile that was used to generate this shader. @@ -152,6 +68,7 @@ class RenderingShaderContainerMetal : public RenderingShaderContainer { */ MinOsVersion os_min_version; uint32_t flags = NONE; + uint32_t push_constant_binding = UINT32_MAX; ///< Metal binding slot for the push constant data /// @brief Returns `true` if the shader is compiled with multi-view support. bool needs_view_mask_buffer() const { @@ -178,6 +95,19 @@ class RenderingShaderContainerMetal : public RenderingShaderContainer { flags &= ~USES_ARGUMENT_BUFFERS; } } + + /// Returns `true` if the shader was compiled with the GL_EXT_debug_printf extension enabled. + bool needs_debug_logging() const { + return flags & NEEDS_DEBUG_LOGGING; + } + + void set_needs_debug_logging(bool p_value) { + if (p_value) { + flags |= NEEDS_DEBUG_LOGGING; + } else { + flags &= ~NEEDS_DEBUG_LOGGING; + } + } }; struct StageData { @@ -187,69 +117,42 @@ class RenderingShaderContainerMetal : public RenderingShaderContainer { SHA256Digest hash; ///< SHA 256 hash of the shader code uint32_t source_size = 0; ///< size of the source code in the returned bytes uint32_t library_size = 0; ///< size of the compiled library in the returned bytes, 0 if it is not compiled - uint32_t push_constant_binding = UINT32_MAX; ///< Metal binding slot for the push constant data }; - struct BindingInfoData { - uint32_t shader_stage = UINT32_MAX; ///< The shader stage this binding is used in, or UINT32_MAX if not used. + struct UniformData { + uint32_t active_stages = 0; + uint32_t uniform_type = 0; // UniformType uint32_t data_type = 0; // MTLDataTypeNone - uint32_t index = 0; uint32_t access = 0; // MTLBindingAccessReadOnly uint32_t usage = 0; // MTLResourceUsage (none) uint32_t texture_type = 2; // MTLTextureType2D uint32_t image_format = 0; uint32_t array_length = 0; uint32_t is_multisampled = 0; - }; - struct UniformData { - /// Specifies the index into the `bindings` array for the shader stage. - /// - /// For example, a vertex and fragment shader use slots 0 and 1 of the bindings and bindings_secondary arrays. - static constexpr uint32_t STAGE_INDEX[RenderingDeviceCommons::SHADER_STAGE_MAX] = { - 0, // SHADER_STAGE_VERTEX - 1, // SHADER_STAGE_FRAGMENT - 0, // SHADER_STAGE_TESSELATION_CONTROL - 1, // SHADER_STAGE_TESSELATION_EVALUATION - 0, // SHADER_STAGE_COMPUTE + struct Indexes { + uint32_t buffer = UINT32_MAX; + uint32_t texture = UINT32_MAX; + uint32_t sampler = UINT32_MAX; }; + Indexes slot; + Indexes arg_buffer; - /// Specifies the stages the uniform data is - /// used by the Metal shader. - uint32_t active_stages = 0; - /// The primary binding information for the uniform data. - /// - /// A maximum of two stages is expected for any given pipeline, such as a vertex and fragment, so - /// the array size is fixed to 2. - BindingInfoData bindings[2]; - /// The secondary binding information for the uniform data. - /// - /// This is typically a sampler for an image-sampler uniform - BindingInfoData bindings_secondary[2]; - - _FORCE_INLINE_ constexpr uint32_t get_index_for_stage(RenderingDeviceCommons::ShaderStage p_stage) const { - return STAGE_INDEX[p_stage]; - } - - _FORCE_INLINE_ BindingInfoData &get_binding_for_stage(RenderingDeviceCommons::ShaderStage p_stage) { - BindingInfoData &info = bindings[get_index_for_stage(p_stage)]; - DEV_ASSERT(info.shader_stage == UINT32_MAX || info.shader_stage == p_stage); // make sure this uniform isn't used in the other stage - info.shader_stage = p_stage; - return info; - } + enum class IndexType { + SLOT, + ARG, + }; - _FORCE_INLINE_ BindingInfoData &get_secondary_binding_for_stage(RenderingDeviceCommons::ShaderStage p_stage) { - BindingInfoData &info = bindings_secondary[get_index_for_stage(p_stage)]; - DEV_ASSERT(info.shader_stage == UINT32_MAX || info.shader_stage == p_stage); // make sure this uniform isn't used in the other stage - info.shader_stage = p_stage; - return info; + _FORCE_INLINE_ Indexes &get_indexes(IndexType p_type) { + switch (p_type) { + case IndexType::SLOT: + return slot; + case IndexType::ARG: + return arg_buffer; + } } }; - struct SpecializationData { - uint32_t used_stages = 0; - }; - HeaderData mtl_reflection_data; // compliment to reflection_data Vector mtl_shaders; // compliment to shaders @@ -269,23 +172,19 @@ class RenderingShaderContainerMetal : public RenderingShaderContainer { private: const MetalDeviceProfile *device_profile = nullptr; bool export_mode = false; - MinOsVersion min_os_version; Vector mtl_reflection_binding_set_uniforms_data; // compliment to reflection_binding_set_uniforms_data - Vector mtl_reflection_specialization_data; // compliment to reflection_specialization_data Error compile_metal_source(const char *p_source, const StageData &p_stage_data, Vector &r_binary_data); public: - static constexpr uint32_t FORMAT_VERSION = 1; + static constexpr uint32_t FORMAT_VERSION = 2; void set_export_mode(bool p_export_mode) { export_mode = p_export_mode; } void set_device_profile(const MetalDeviceProfile *p_device_profile) { device_profile = p_device_profile; } - void set_min_os_version(const MinOsVersion p_min_os_version) { min_os_version = p_min_os_version; } struct MetalShaderReflection { Vector> uniform_sets; - Vector specialization_constants; }; MetalShaderReflection get_metal_shader_reflection() const; @@ -294,24 +193,20 @@ class RenderingShaderContainerMetal : public RenderingShaderContainer { virtual uint32_t _from_bytes_reflection_extra_data(const uint8_t *p_bytes) override; virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) override; virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) override; - virtual uint32_t _from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) override; - virtual uint32_t _from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) override; virtual uint32_t _from_bytes_shader_extra_data_start(const uint8_t *p_bytes) override; virtual uint32_t _from_bytes_shader_extra_data(const uint8_t *p_bytes, uint32_t p_index) override; virtual uint32_t _to_bytes_reflection_extra_data(uint8_t *p_bytes) const override; virtual uint32_t _to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; - virtual uint32_t _to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; virtual uint32_t _to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; virtual uint32_t _format() const override; virtual uint32_t _format_version() const override; - virtual bool _set_code_from_spirv(Span p_spirv) override; + virtual bool _set_code_from_spirv(const ReflectShader &p_shader) override; }; class RenderingShaderContainerFormatMetal : public RenderingShaderContainerFormat { bool export_mode = false; - MinOsVersion min_os_version; const MetalDeviceProfile *device_profile = nullptr; @@ -319,6 +214,6 @@ class RenderingShaderContainerFormatMetal : public RenderingShaderContainerForma virtual Ref create_container() const override; virtual ShaderLanguageVersion get_shader_language_version() const override; virtual ShaderSpirvVersion get_shader_spirv_version() const override; - RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export = false, const MinOsVersion p_min_os_version = MinOsVersion()); + RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export = false); virtual ~RenderingShaderContainerFormatMetal() = default; }; diff --git a/drivers/metal/rendering_shader_container_metal.mm b/drivers/metal/rendering_shader_container_metal.mm index 596d839f7929..6fc95e08f03e 100644 --- a/drivers/metal/rendering_shader_container_metal.mm +++ b/drivers/metal/rendering_shader_container_metal.mm @@ -34,85 +34,16 @@ #import "core/io/file_access.h" #import "core/io/marshalls.h" +#import "core/templates/fixed_vector.h" #import "servers/rendering/rendering_device.h" +#include "thirdparty/spirv-reflect/spirv_reflect.h" + #import #import #import #import -Mutex MetalDeviceProfile::profiles_lock; -HashMap MetalDeviceProfile::profiles; - -const MetalDeviceProfile *MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform p_platform, MetalDeviceProfile::GPU p_gpu) { - DEV_ASSERT(p_platform == Platform::macOS || p_platform == Platform::iOS); - - MutexLock lock(profiles_lock); - - uint32_t key = (uint32_t)p_platform << 16 | (uint32_t)p_gpu; - if (MetalDeviceProfile *profile = profiles.getptr(key)) { - return profile; - } - - MetalDeviceProfile res; - res.platform = p_platform; - res.gpu = p_gpu; - if (p_platform == Platform::macOS) { - res.features.mslVersionMajor = 3; - res.features.mslVersionMinor = 2; - res.features.argument_buffers_tier = ArgumentBuffersTier::Tier2; - res.features.simdPermute = true; - } else if (p_platform == Platform::iOS) { - switch (p_gpu) { - case GPU::Apple1: - case GPU::Apple2: - case GPU::Apple3: - case GPU::Apple4: - case GPU::Apple5: { - res.features.simdPermute = false; - res.features.argument_buffers_tier = ArgumentBuffersTier::Tier1; - } break; - case GPU::Apple6: - case GPU::Apple7: - case GPU::Apple8: - case GPU::Apple9: { - res.features.argument_buffers_tier = ArgumentBuffersTier::Tier2; - res.features.simdPermute = true; - } break; - } - res.features.mslVersionMajor = 3; - res.features.mslVersionMinor = 1; - } - - res.update_options(); - - return &profiles.insert(key, res)->value; -} - -void MetalDeviceProfile::update_options() { - options.argument_buffers_tier = features.argument_buffers_tier; - - if (OS::get_singleton()->has_environment(U"GODOT_MTL_ARGUMENT_BUFFERS_TIER")) { - uint64_t tier = OS::get_singleton()->get_environment(U"GODOT_MTL_ARGUMENT_BUFFERS_TIER").to_int(); - switch (tier) { - case 1: - // All devices support tier 1 argument buffers. - options.argument_buffers_tier = ArgumentBuffersTier::Tier1; - break; - case 2: - if (features.argument_buffers_tier >= ArgumentBuffersTier::Tier2) { - options.argument_buffers_tier = ArgumentBuffersTier::Tier2; - } else { - WARN_PRINT("Current device does not support tier 2 argument buffers, leaving as default."); - } - break; - default: - WARN_PRINT(vformat("Invalid value for GODOT_MTL_ARGUMENT_BUFFER_TIER: %d. Falling back to device default.", tier)); - break; - } - } -} - void RenderingShaderContainerMetal::_initialize_toolchain_properties() { if (compiler_props.is_valid()) { return; @@ -126,21 +57,25 @@ case MetalDeviceProfile::Platform::iOS: sdk = "iphoneos"; break; + case MetalDeviceProfile::Platform::visionOS: + sdk = "xros"; + break; } Vector parts{ "echo", R"("")", "|", "/usr/bin/xcrun", "-sdk", sdk, "metal", "-E", "-dM", "-x", "metal" }; - // Compile metal shaders for the minimum supported target instead of the host machine - if (min_os_version.is_valid()) { - switch (device_profile->platform) { - case MetalDeviceProfile::Platform::macOS: { - parts.push_back("-mmacosx-version-min=" + min_os_version.to_compiler_os_version()); - break; - } - case MetalDeviceProfile::Platform::iOS: { - parts.push_back("-mios-version-min=" + min_os_version.to_compiler_os_version()); - break; - } + switch (device_profile->platform) { + case MetalDeviceProfile::Platform::macOS: { + parts.push_back("-mtargetos=macos" + device_profile->min_os_version.to_compiler_os_version()); + break; + } + case MetalDeviceProfile::Platform::iOS: { + parts.push_back("-mtargetos=ios" + device_profile->min_os_version.to_compiler_os_version()); + break; + } + case MetalDeviceProfile::Platform::visionOS: { + parts.push_back("-mtargetos=xros" + device_profile->min_os_version.to_compiler_os_version()); + break; } } @@ -205,6 +140,9 @@ case MetalDeviceProfile::Platform::iOS: sdk = "iphoneos"; break; + case MetalDeviceProfile::Platform::visionOS: + sdk = "xros"; + break; } // Build the .metallib binary. @@ -212,19 +150,19 @@ List args{ "-sdk", sdk, "metal", "-O3" }; // Compile metal shaders for the minimum supported target instead of the host machine. - if (min_os_version.is_valid()) { - switch (device_profile->platform) { - case MetalDeviceProfile::Platform::macOS: { - args.push_back("-mmacosx-version-min=" + min_os_version.to_compiler_os_version()); - break; - } - case MetalDeviceProfile::Platform::iOS: { - args.push_back("-mios-version-min=" + min_os_version.to_compiler_os_version()); - break; - } + switch (device_profile->platform) { + case MetalDeviceProfile::Platform::macOS: { + args.push_back("-mtargetos=macos" + device_profile->min_os_version.to_compiler_os_version()); + break; + } + case MetalDeviceProfile::Platform::iOS: { + args.push_back("-mtargetos=ios" + device_profile->min_os_version.to_compiler_os_version()); + break; + } + case MetalDeviceProfile::Platform::visionOS: { + args.push_back("-mtargetos=xros" + device_profile->min_os_version.to_compiler_os_version()); + break; } - } else { - WARN_PRINT_ONCE(vformat("Minimum target OS version is not set, so baking shaders for Metal will target the default version of your toolchain: %s", compiler_props.os_version_min_required.to_compiler_os_version())); } if (p_stage_data.is_position_invariant) { @@ -279,11 +217,32 @@ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunguarded-availability" -bool RenderingShaderContainerMetal::_set_code_from_spirv(Span p_spirv) { +static spv::ExecutionModel SHADER_STAGE_REMAP[RDD::SHADER_STAGE_MAX] = { + [RDD::SHADER_STAGE_VERTEX] = spv::ExecutionModelVertex, + [RDD::SHADER_STAGE_FRAGMENT] = spv::ExecutionModelFragment, + [RDD::SHADER_STAGE_TESSELATION_CONTROL] = spv::ExecutionModelTessellationControl, + [RDD::SHADER_STAGE_TESSELATION_EVALUATION] = spv::ExecutionModelTessellationEvaluation, + [RDD::SHADER_STAGE_COMPUTE] = spv::ExecutionModelGLCompute, +}; + +spv::ExecutionModel get_stage(uint32_t p_stages_mask, RDD::ShaderStage p_stage) { + if (p_stages_mask & (1 << p_stage)) { + return SHADER_STAGE_REMAP[p_stage]; + } + return spv::ExecutionModel::ExecutionModelMax; +} + +spv::ExecutionModel map_stage(RDD::ShaderStage p_stage) { + return SHADER_STAGE_REMAP[p_stage]; +} + +bool RenderingShaderContainerMetal::_set_code_from_spirv(const ReflectShader &p_shader) { using namespace spirv_cross; using spirv_cross::CompilerMSL; using spirv_cross::Resource; + const LocalVector &p_spirv = p_shader.shader_stages; + if (export_mode) { _initialize_toolchain_properties(); } @@ -292,26 +251,10 @@ shaders.resize(p_spirv.size()); mtl_shaders.resize(p_spirv.size()); mtl_reflection_binding_set_uniforms_data.resize(reflection_binding_set_uniforms_data.size()); - mtl_reflection_specialization_data.resize(reflection_specialization_data.size()); mtl_reflection_data.set_needs_view_mask_buffer(reflection_data.has_multiview); mtl_reflection_data.profile = *device_profile; - // set_indexes will contain the starting offsets of each descriptor set in the binding set uniforms data - // including the last one, which is the size of reflection_binding_set_uniforms_count. - LocalVector set_indexes; - uint32_t set_indexes_size = reflection_binding_set_uniforms_count.size() + 1; - { - // calculate the starting offsets of each descriptor set in the binding set uniforms data - uint32_t size = reflection_binding_set_uniforms_count.size(); - set_indexes.resize(set_indexes_size); - uint32_t offset = 0; - for (uint32_t i = 0; i < size; i++) { - set_indexes[i] = offset; - offset += reflection_binding_set_uniforms_count.get(i); - } - set_indexes[set_indexes_size - 1] = offset; - } CompilerMSL::Options msl_options{}; // Determine Metal language version. @@ -322,7 +265,7 @@ msl_version = compiler_props.metal_version; mtl_reflection_data.os_min_version = compiler_props.os_version_min_required; } else { - msl_version = make_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor); + msl_version = device_profile->features.msl_version; mtl_reflection_data.os_min_version = MinOsVersion(); } uint32_t msl_ver_maj = 0; @@ -339,9 +282,12 @@ msl_options.ios_support_base_vertex_instance = true; } + // We don't currently allow argument buffers when using dynamic buffers as + // the current implementation does not update the argument buffer each time + // the dynamic buffer changes. This is a future TODO. bool argument_buffers_allowed = get_shader_reflection().has_dynamic_buffers == false; - if (device_profile->options.argument_buffers_tier >= MetalDeviceProfile::ArgumentBuffersTier::Tier2 && argument_buffers_allowed) { + if (device_profile->features.use_argument_buffers && argument_buffers_allowed) { msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier2; msl_options.argument_buffers = true; mtl_reflection_data.set_uses_argument_buffers(true); @@ -352,8 +298,7 @@ mtl_reflection_data.set_uses_argument_buffers(false); } msl_options.force_active_argument_buffer_resources = true; - // We can't use this, as we have to add the descriptor sets via compiler.add_msl_resource_binding. - // msl_options.pad_argument_buffer_resources = true; + msl_options.pad_argument_buffer_resources = true; msl_options.texture_buffer_native = true; // Enable texture buffer support. msl_options.use_framebuffer_fetch_subpasses = false; msl_options.pad_fragment_output_components = true; @@ -364,7 +309,7 @@ msl_options.multiview_layered_rendering = true; msl_options.view_mask_buffer_index = VIEW_MASK_BUFFER_INDEX; } - if (msl_version >= make_msl_version(3, 2)) { + if (msl_version >= MSL_VERSION_32) { // All 3.2+ versions support device coherence, so we can disable texture fences. msl_options.readwrite_texture_fences = false; } @@ -375,309 +320,281 @@ options.emit_line_directives = true; #endif - for (uint32_t i = 0; i < p_spirv.size(); i++) { - StageData &stage_data = mtl_shaders.write[i]; - const ReflectedShaderStage &v = p_spirv[i]; - RD::ShaderStage stage = v.shader_stage; - char const *stage_name = RD::SHADER_STAGE_NAMES[stage]; - Span spirv = v.spirv(); - Parser parser(spirv.ptr(), spirv.size()); - try { - parser.parse(); - } catch (CompilerError &e) { - ERR_FAIL_V_MSG(false, "Failed to parse IR at stage " + String(RD::SHADER_STAGE_NAMES[stage]) + ": " + e.what()); - } - - CompilerMSL compiler(std::move(parser.get_parsed_ir())); - compiler.set_msl_options(msl_options); - compiler.set_common_options(options); - - std::unordered_set active = compiler.get_active_interface_variables(); - ShaderResources resources = compiler.get_shader_resources(); - - std::string source; - try { - source = compiler.compile(); - } catch (CompilerError &e) { - ERR_FAIL_V_MSG(false, "Failed to compile stage " + String(RD::SHADER_STAGE_NAMES[stage]) + ": " + e.what()); - } - - ERR_FAIL_COND_V_MSG(compiler.get_entry_points_and_stages().size() != 1, false, "Expected a single entry point and stage."); - - SmallVector entry_pts_stages = compiler.get_entry_points_and_stages(); - EntryPoint &entry_point_stage = entry_pts_stages.front(); - SPIREntryPoint &entry_point = compiler.get_entry_point(entry_point_stage.name, entry_point_stage.execution_model); - - // Process specialization constants. - if (!compiler.get_specialization_constants().empty()) { - uint32_t size = reflection_specialization_data.size(); - for (SpecializationConstant const &constant : compiler.get_specialization_constants()) { - uint32_t j = 0; - while (j < size) { - const ReflectionSpecializationData &res = reflection_specialization_data.ptr()[j]; - if (res.constant_id == constant.constant_id) { - mtl_reflection_specialization_data.ptrw()[j].used_stages |= 1 << stage; - // emulate labeled for loop and continue - goto outer_continue; - } - ++j; - } - if (j == size) { - WARN_PRINT(String(stage_name) + ": unable to find constant_id: " + itos(constant.constant_id)); - } - outer_continue:; - } - } - - // Process bindings. - uint32_t uniform_sets_size = reflection_binding_set_uniforms_count.size(); - using BT = SPIRType::BaseType; - - // Always clearer than a boolean. - enum class Writable { - No, - Maybe, + // Assign MSL bindings for all the descriptor sets. + typedef std::pair MSLBindingInfo; + LocalVector spirv_bindings; + MSLResourceBinding push_constant_resource_binding; + { + enum IndexType { + Texture, + Buffer, + Sampler, + Max, }; - // Returns a std::optional containing the value of the - // decoration, if it exists. - auto get_decoration = [&compiler](spirv_cross::ID id, spv::Decoration decoration) { - uint32_t res = -1; - if (compiler.has_decoration(id, decoration)) { - res = compiler.get_decoration(id, decoration); - } - return res; + uint32_t dset_count = p_shader.uniform_sets.size(); + uint32_t size = reflection_binding_set_uniforms_data.size(); + spirv_bindings.resize(size); + + uint32_t indices[IndexType::Max] = { 0 }; + auto next_index = [&indices](IndexType p_t, uint32_t p_stride) -> uint32_t { + uint32_t v = indices[p_t]; + indices[p_t] += p_stride; + return v; }; - auto descriptor_bindings = [&compiler, &active, this, &set_indexes, uniform_sets_size, stage, &get_decoration](SmallVector &p_resources, Writable p_writable) { - for (Resource const &res : p_resources) { - uint32_t dset = get_decoration(res.id, spv::DecorationDescriptorSet); - uint32_t dbin = get_decoration(res.id, spv::DecorationBinding); - UniformData *found = nullptr; - if (dset != (uint32_t)-1 && dbin != (uint32_t)-1 && dset < uniform_sets_size) { - uint32_t begin = set_indexes[dset]; - uint32_t end = set_indexes[dset + 1]; - for (uint32_t j = begin; j < end; j++) { - const ReflectionBindingData &ref_bind = reflection_binding_set_uniforms_data[j]; - if (dbin == ref_bind.binding) { - found = &mtl_reflection_binding_set_uniforms_data.write[j]; + uint32_t idx_dset = 0; + MSLBindingInfo *iter = spirv_bindings.ptr(); + UniformData *found = mtl_reflection_binding_set_uniforms_data.ptrw(); + UniformData::IndexType shader_index_type = msl_options.argument_buffers ? UniformData::IndexType::ARG : UniformData::IndexType::SLOT; + + for (const ReflectDescriptorSet &dset : p_shader.uniform_sets) { + // Reset the index count for each descriptor set, as this is an index in to the argument table. + uint32_t next_arg_buffer_index = 0; + auto next_arg_index = [&next_arg_buffer_index](uint32_t p_stride) -> uint32_t { + uint32_t v = next_arg_buffer_index; + next_arg_buffer_index += p_stride; + return v; + }; + + for (const ReflectUniform &uniform : dset) { + const SpvReflectDescriptorBinding &binding = uniform.get_spv_reflect(); + + found->active_stages = uniform.stages; + + RD::UniformType type = RD::UniformType(uniform.type); + uint32_t binding_stride = 1; // If this is an array, stride will be the length of the array. + if (uniform.length > 1) { + switch (type) { + case RDC::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: + case RDC::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: + case RDC::UNIFORM_TYPE_UNIFORM_BUFFER: + case RDC::UNIFORM_TYPE_STORAGE_BUFFER: + // Buffers's length is its size, in bytes, so there is no stride. break; - } + default: { + binding_stride = uniform.length; + found->array_length = uniform.length; + } break; } } - ERR_FAIL_NULL_V_MSG(found, ERR_CANT_CREATE, "UniformData not found"); + // Determine access type. + switch (binding.descriptor_type) { + case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_IMAGE: { + if (!(binding.decoration_flags & SPV_REFLECT_DECORATION_NON_WRITABLE)) { + if (!(binding.decoration_flags & SPV_REFLECT_DECORATION_NON_READABLE)) { + found->access = MTLBindingAccessReadWrite; + } else { + found->access = MTLBindingAccessWriteOnly; + } + } + } break; + case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_BUFFER: { + if (!(binding.decoration_flags & SPV_REFLECT_DECORATION_NON_WRITABLE) && !(binding.block.decoration_flags & SPV_REFLECT_DECORATION_NON_WRITABLE)) { + if (!(binding.decoration_flags & SPV_REFLECT_DECORATION_NON_READABLE) && !(binding.block.decoration_flags & SPV_REFLECT_DECORATION_NON_READABLE)) { + found->access = MTLBindingAccessReadWrite; + } else { + found->access = MTLBindingAccessWriteOnly; + } + } + } break; + default: + break; + } - bool is_active = active.find(res.id) != active.end(); - if (is_active) { - found->active_stages |= 1 << stage; + switch (found->access) { + case MTLBindingAccessReadOnly: + found->usage = MTLResourceUsageRead; + break; + case MTLBindingAccessWriteOnly: + found->usage = MTLResourceUsageWrite; + break; + case MTLBindingAccessReadWrite: + found->usage = MTLResourceUsageRead | MTLResourceUsageWrite; + break; } - BindingInfoData &primary = found->get_binding_for_stage(stage); + iter->second = uniform.stages; + MSLResourceBinding &rb = iter->first; + rb.desc_set = idx_dset; + rb.binding = uniform.binding; + rb.count = binding_stride; - SPIRType const &a_type = compiler.get_type(res.type_id); - BT basetype = a_type.basetype; + switch (type) { + case RDC::UNIFORM_TYPE_SAMPLER: { + found->data_type = MTLDataTypeSampler; + found->get_indexes(UniformData::IndexType::SLOT).sampler = next_index(Sampler, binding_stride); + found->get_indexes(UniformData::IndexType::ARG).sampler = next_arg_index(binding_stride); - switch (basetype) { - case BT::Struct: { - primary.data_type = MTLDataTypePointer; - } break; + rb.basetype = SPIRType::BaseType::Sampler; - case BT::Image: - case BT::SampledImage: { - primary.data_type = MTLDataTypeTexture; } break; - - case BT::Sampler: { - primary.data_type = MTLDataTypeSampler; - primary.array_length = 1; - for (uint32_t const &a : a_type.array) { - primary.array_length *= a; - } + case RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: + case RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: { + found->data_type = MTLDataTypeTexture; + found->get_indexes(UniformData::IndexType::SLOT).texture = next_index(Texture, binding_stride); + found->get_indexes(UniformData::IndexType::SLOT).sampler = next_index(Sampler, binding_stride); + found->get_indexes(UniformData::IndexType::ARG).texture = next_arg_index(binding_stride); + found->get_indexes(UniformData::IndexType::ARG).sampler = next_arg_index(binding_stride); + rb.basetype = SPIRType::BaseType::SampledImage; } break; - - default: { - ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unexpected BaseType"); + case RDC::UNIFORM_TYPE_TEXTURE: + case RDC::UNIFORM_TYPE_IMAGE: + case RDC::UNIFORM_TYPE_TEXTURE_BUFFER: { + found->data_type = MTLDataTypeTexture; + found->get_indexes(UniformData::IndexType::SLOT).texture = next_index(Texture, binding_stride); + found->get_indexes(UniformData::IndexType::ARG).texture = next_arg_index(binding_stride); + rb.basetype = SPIRType::BaseType::Image; + } break; + case RDC::UNIFORM_TYPE_IMAGE_BUFFER: + CRASH_NOW_MSG("Unimplemented!"); // TODO. + break; + case RDC::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: + case RDC::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: + case RDC::UNIFORM_TYPE_UNIFORM_BUFFER: + case RDC::UNIFORM_TYPE_STORAGE_BUFFER: { + found->data_type = MTLDataTypePointer; + found->get_indexes(UniformData::IndexType::SLOT).buffer = next_index(Buffer, binding_stride); + found->get_indexes(UniformData::IndexType::ARG).buffer = next_arg_index(binding_stride); + rb.basetype = SPIRType::BaseType::Void; } break; + case RDC::UNIFORM_TYPE_INPUT_ATTACHMENT: { + found->data_type = MTLDataTypeTexture; + found->get_indexes(UniformData::IndexType::SLOT).texture = next_index(Texture, binding_stride); + found->get_indexes(UniformData::IndexType::ARG).texture = next_arg_index(binding_stride); + rb.basetype = SPIRType::BaseType::Image; + } break; + case RDC::UNIFORM_TYPE_MAX: + default: + CRASH_NOW_MSG("Unreachable"); } - // Find array length of image. - if (basetype == BT::Image || basetype == BT::SampledImage) { - primary.array_length = 1; - for (uint32_t const &a : a_type.array) { - primary.array_length *= a; - } - primary.is_multisampled = a_type.image.ms; + // Specify the MSL resource bindings based on how the binding mode used by the shader. + rb.msl_buffer = found->get_indexes(shader_index_type).buffer; + rb.msl_texture = found->get_indexes(shader_index_type).texture; + rb.msl_sampler = found->get_indexes(shader_index_type).sampler; - SPIRType::ImageType const &image = a_type.image; - primary.image_format = image.format; + if (found->data_type == MTLDataTypeTexture) { + const SpvReflectImageTraits &image = uniform.get_spv_reflect().image; switch (image.dim) { - case spv::Dim1D: { + case SpvDim1D: { if (image.arrayed) { - primary.texture_type = MTLTextureType1DArray; + found->texture_type = MTLTextureType1DArray; } else { - primary.texture_type = MTLTextureType1D; + found->texture_type = MTLTextureType1D; } } break; - case spv::DimSubpassData: { - [[fallthrough]]; - } - case spv::Dim2D: { + case SpvDimSubpassData: + case SpvDim2D: { if (image.arrayed && image.ms) { - primary.texture_type = MTLTextureType2DMultisampleArray; + found->texture_type = MTLTextureType2DMultisampleArray; } else if (image.arrayed) { - primary.texture_type = MTLTextureType2DArray; + found->texture_type = MTLTextureType2DArray; } else if (image.ms) { - primary.texture_type = MTLTextureType2DMultisample; + found->texture_type = MTLTextureType2DMultisample; } else { - primary.texture_type = MTLTextureType2D; + found->texture_type = MTLTextureType2D; } } break; - case spv::Dim3D: { - primary.texture_type = MTLTextureType3D; + case SpvDim3D: { + found->texture_type = MTLTextureType3D; } break; - case spv::DimCube: { + case SpvDimCube: { if (image.arrayed) { - primary.texture_type = MTLTextureTypeCube; + found->texture_type = MTLTextureTypeCubeArray; + } else { + found->texture_type = MTLTextureTypeCube; } } break; - case spv::DimRect: { + case SpvDimRect: { + // Ignored. } break; - case spv::DimBuffer: { - // VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER - primary.texture_type = MTLTextureTypeTextureBuffer; + case SpvDimBuffer: { + found->texture_type = MTLTextureTypeTextureBuffer; } break; - case spv::DimTileImageDataEXT: { + case SpvDimTileImageDataEXT: { // Godot does not use this extension. // See: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_EXT_shader_tile_image.html } break; - case spv::DimMax: { + case SpvDimMax: { // Add all enumerations to silence the compiler warning // and generate future warnings, should a new one be added. } break; } } - // Update writable. - if (p_writable == Writable::Maybe) { - if (basetype == BT::Struct) { - Bitset flags = compiler.get_buffer_block_flags(res.id); - if (!flags.get(spv::DecorationNonWritable)) { - if (flags.get(spv::DecorationNonReadable)) { - primary.access = MTLBindingAccessWriteOnly; - } else { - primary.access = MTLBindingAccessReadWrite; - } - } - } else if (basetype == BT::Image) { - switch (a_type.image.access) { - case spv::AccessQualifierWriteOnly: - primary.access = MTLBindingAccessWriteOnly; - break; - case spv::AccessQualifierReadWrite: - primary.access = MTLBindingAccessReadWrite; - break; - case spv::AccessQualifierReadOnly: - break; - case spv::AccessQualifierMax: - [[fallthrough]]; - default: - if (!compiler.has_decoration(res.id, spv::DecorationNonWritable)) { - if (compiler.has_decoration(res.id, spv::DecorationNonReadable)) { - primary.access = MTLBindingAccessWriteOnly; - } else { - primary.access = MTLBindingAccessReadWrite; - } - } - break; - } - } - } - - switch (primary.access) { - case MTLBindingAccessReadOnly: - primary.usage = MTLResourceUsageRead; - break; - case MTLBindingAccessWriteOnly: - primary.usage = MTLResourceUsageWrite; - break; - case MTLBindingAccessReadWrite: - primary.usage = MTLResourceUsageRead | MTLResourceUsageWrite; - break; - } - - primary.index = compiler.get_automatic_msl_resource_binding(res.id); - - // A sampled image contains two bindings, the primary - // is to the image, and the secondary is to the associated sampler. - if (basetype == BT::SampledImage) { - uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id); - if (binding != (uint32_t)-1) { - BindingInfoData &secondary = found->get_secondary_binding_for_stage(stage); - secondary.data_type = MTLDataTypeSampler; - secondary.index = binding; - secondary.access = MTLBindingAccessReadOnly; - } - } - - // An image may have a secondary binding if it is used - // for atomic operations. - if (basetype == BT::Image) { - uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id); - if (binding != (uint32_t)-1) { - BindingInfoData &secondary = found->get_secondary_binding_for_stage(stage); - secondary.data_type = MTLDataTypePointer; - secondary.index = binding; - secondary.access = MTLBindingAccessReadWrite; - } - } + iter++; + found++; } - return Error::OK; - }; - - if (!resources.uniform_buffers.empty()) { - Error err = descriptor_bindings(resources.uniform_buffers, Writable::No); - ERR_FAIL_COND_V(err != OK, false); - } - if (!resources.storage_buffers.empty()) { - Error err = descriptor_bindings(resources.storage_buffers, Writable::Maybe); - ERR_FAIL_COND_V(err != OK, false); + idx_dset++; } - if (!resources.storage_images.empty()) { - Error err = descriptor_bindings(resources.storage_images, Writable::Maybe); - ERR_FAIL_COND_V(err != OK, false); + + if (reflection_data.push_constant_size > 0) { + push_constant_resource_binding.desc_set = ResourceBindingPushConstantDescriptorSet; + push_constant_resource_binding.basetype = SPIRType::BaseType::Void; + if (msl_options.argument_buffers) { + push_constant_resource_binding.msl_buffer = dset_count; + } else { + push_constant_resource_binding.msl_buffer = next_index(Buffer, 1); + } + mtl_reflection_data.push_constant_binding = push_constant_resource_binding.msl_buffer; } - if (!resources.sampled_images.empty()) { - Error err = descriptor_bindings(resources.sampled_images, Writable::No); - ERR_FAIL_COND_V(err != OK, false); + } + + for (uint32_t i = 0; i < p_spirv.size(); i++) { + StageData &stage_data = mtl_shaders.write[i]; + const ReflectShaderStage &v = p_spirv[i]; + RD::ShaderStage stage = v.shader_stage; + Span spirv = v.spirv(); + Parser parser(spirv.ptr(), spirv.size()); + try { + parser.parse(); + } catch (CompilerError &e) { + ERR_FAIL_V_MSG(false, "Failed to parse IR at stage " + String(RD::SHADER_STAGE_NAMES[stage]) + ": " + e.what()); } - if (!resources.separate_images.empty()) { - Error err = descriptor_bindings(resources.separate_images, Writable::No); - ERR_FAIL_COND_V(err != OK, false); + + CompilerMSL compiler(std::move(parser.get_parsed_ir())); + compiler.set_msl_options(msl_options); + compiler.set_common_options(options); + + spv::ExecutionModel execution_model = map_stage(stage); + for (uint32_t jj = 0; jj < spirv_bindings.size(); jj++) { + MSLResourceBinding &rb = spirv_bindings.ptr()[jj].first; + rb.stage = execution_model; + compiler.add_msl_resource_binding(rb); } - if (!resources.separate_samplers.empty()) { - Error err = descriptor_bindings(resources.separate_samplers, Writable::No); - ERR_FAIL_COND_V(err != OK, false); + + if (push_constant_resource_binding.desc_set == ResourceBindingPushConstantDescriptorSet) { + push_constant_resource_binding.stage = execution_model; + compiler.add_msl_resource_binding(push_constant_resource_binding); } - if (!resources.subpass_inputs.empty()) { - Error err = descriptor_bindings(resources.subpass_inputs, Writable::No); - ERR_FAIL_COND_V(err != OK, false); + + std::unordered_set active = compiler.get_active_interface_variables(); + ShaderResources resources = compiler.get_shader_resources(); + + std::string source; + try { + source = compiler.compile(); + } catch (CompilerError &e) { + ERR_FAIL_V_MSG(false, "Failed to compile stage " + String(RD::SHADER_STAGE_NAMES[stage]) + ": " + e.what()); } - if (!resources.push_constant_buffers.empty()) { - for (Resource const &res : resources.push_constant_buffers) { - uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id); - if (binding != (uint32_t)-1) { - stage_data.push_constant_binding = binding; - } + ERR_FAIL_COND_V_MSG(compiler.get_entry_points_and_stages().size() != 1, false, "Expected a single entry point and stage."); + + SmallVector entry_pts_stages = compiler.get_entry_points_and_stages(); + EntryPoint &entry_point_stage = entry_pts_stages.front(); + SPIREntryPoint &entry_point = compiler.get_entry_point(entry_point_stage.name, entry_point_stage.execution_model); + + for (auto ext : compiler.get_declared_extensions()) { + if (ext == "SPV_KHR_non_semantic_info" || ext == "SPV_KHR_printf") { + mtl_reflection_data.set_needs_debug_logging(true); + break; } } - ERR_FAIL_COND_V_MSG(!resources.atomic_counters.empty(), false, "Atomic counters not supported"); - ERR_FAIL_COND_V_MSG(!resources.acceleration_structures.empty(), false, "Acceleration structures not supported"); - ERR_FAIL_COND_V_MSG(!resources.shader_record_buffers.empty(), false, "Shader record buffers not supported"); - if (!resources.stage_inputs.empty()) { for (Resource const &res : resources.stage_inputs) { uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id); @@ -744,13 +661,6 @@ return sizeof(UniformData); } -uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const { - if (p_bytes != nullptr) { - *(SpecializationData *)p_bytes = mtl_reflection_specialization_data[p_index]; - } - return sizeof(SpecializationData); -} - uint32_t RenderingShaderContainerMetal::_to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const { if (p_bytes != nullptr) { *(StageData *)p_bytes = mtl_shaders[p_index]; @@ -773,16 +683,6 @@ return sizeof(UniformData); } -uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) { - mtl_reflection_specialization_data.resize(reflection_specialization_data.size()); - return 0; -} - -uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) { - mtl_reflection_specialization_data.ptrw()[p_index] = *(SpecializationData *)p_bytes; - return sizeof(SpecializationData); -} - uint32_t RenderingShaderContainerMetal::_from_bytes_shader_extra_data_start(const uint8_t *p_bytes) { mtl_shaders.resize(shaders.size()); return 0; @@ -796,7 +696,6 @@ RenderingShaderContainerMetal::MetalShaderReflection RenderingShaderContainerMetal::get_metal_shader_reflection() const { MetalShaderReflection res; - res.specialization_constants = mtl_reflection_specialization_data; uint32_t uniform_set_count = reflection_binding_set_uniforms_count.size(); uint32_t start = 0; res.uniform_sets.resize(uniform_set_count); @@ -824,7 +723,6 @@ result.instantiate(); result->set_export_mode(export_mode); result->set_device_profile(device_profile); - result->set_min_os_version(min_os_version); return result; } @@ -836,8 +734,8 @@ return SHADER_SPIRV_VERSION_1_6; } -RenderingShaderContainerFormatMetal::RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export, const MinOsVersion p_min_os_version) : - export_mode(p_export), min_os_version(p_min_os_version), device_profile(p_device_profile) { +RenderingShaderContainerFormatMetal::RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export) : + export_mode(p_export), device_profile(p_device_profile) { } String MinOsVersion::to_compiler_os_version() const { diff --git a/drivers/metal/sha256_digest.h b/drivers/metal/sha256_digest.h index 28a7a5733fe6..6b477c959da5 100644 --- a/drivers/metal/sha256_digest.h +++ b/drivers/metal/sha256_digest.h @@ -34,6 +34,7 @@ #import #import +#include "core/templates/hashfuncs.h" #include "core/templates/local_vector.h" struct SHA256Digest { @@ -73,3 +74,10 @@ struct SHA256Digest { return SHA256Digest((const char *)p_ser.ptr()); } }; + +template <> +struct HashMapComparatorDefault { + static bool compare(const SHA256Digest &p_lhs, const SHA256Digest &p_rhs) { + return memcmp(p_lhs.data, p_rhs.data, CC_SHA256_DIGEST_LENGTH) == 0; + } +}; diff --git a/drivers/vulkan/rendering_shader_container_vulkan.cpp b/drivers/vulkan/rendering_shader_container_vulkan.cpp index 133b3fba304a..7905dfc18af6 100644 --- a/drivers/vulkan/rendering_shader_container_vulkan.cpp +++ b/drivers/vulkan/rendering_shader_container_vulkan.cpp @@ -44,7 +44,9 @@ uint32_t RenderingShaderContainerVulkan::_format_version() const { return FORMAT_VERSION; } -bool RenderingShaderContainerVulkan::_set_code_from_spirv(Span p_spirv) { +bool RenderingShaderContainerVulkan::_set_code_from_spirv(const ReflectShader &p_shader) { + const LocalVector &p_spirv = p_shader.shader_stages; + PackedByteArray code_bytes; shaders.resize(p_spirv.size()); for (uint64_t i = 0; i < p_spirv.size(); i++) { diff --git a/drivers/vulkan/rendering_shader_container_vulkan.h b/drivers/vulkan/rendering_shader_container_vulkan.h index cc22cbde8ce3..34432976387b 100644 --- a/drivers/vulkan/rendering_shader_container_vulkan.h +++ b/drivers/vulkan/rendering_shader_container_vulkan.h @@ -47,7 +47,7 @@ class RenderingShaderContainerVulkan : public RenderingShaderContainer { protected: virtual uint32_t _format() const override; virtual uint32_t _format_version() const override; - virtual bool _set_code_from_spirv(Span p_spirv) override; + virtual bool _set_code_from_spirv(const ReflectShader &p_shader) override; public: RenderingShaderContainerVulkan(bool p_debug_info_enabled); diff --git a/editor/shader/shader_baker/shader_baker_export_plugin_platform_metal.cpp b/editor/shader/shader_baker/shader_baker_export_plugin_platform_metal.cpp index d92c661e2ae8..8570b5f164ad 100644 --- a/editor/shader/shader_baker/shader_baker_export_plugin_platform_metal.cpp +++ b/editor/shader/shader_baker/shader_baker_export_plugin_platform_metal.cpp @@ -35,19 +35,23 @@ RenderingShaderContainerFormat *ShaderBakerExportPluginPlatformMetal::create_shader_container_format(const Ref &p_platform, const Ref &p_preset) { const String &os_name = p_platform->get_os_name(); const MetalDeviceProfile *profile; - String min_os_version; + MinOsVersion min_os_version; if (os_name == U"macOS") { - profile = MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform::macOS, MetalDeviceProfile::GPU::Apple7); + min_os_version = (String)p_preset->get("application/min_macos_version_arm64"); // Godot metal doesn't support x86_64 mac so no need to worry about that version - min_os_version = p_preset->get("application/min_macos_version_arm64"); + profile = MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform::macOS, MetalDeviceProfile::GPU::Apple7, min_os_version); } else if (os_name == U"iOS") { - profile = MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform::iOS, MetalDeviceProfile::GPU::Apple7); - min_os_version = p_preset->get("application/min_ios_version"); + min_os_version = (String)p_preset->get("application/min_ios_version"); + profile = MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform::iOS, MetalDeviceProfile::GPU::Apple7, min_os_version); + } else if (os_name == U"visionOS") { + min_os_version = (String)p_preset->get("application/min_visionos_version"); + profile = MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform::visionOS, MetalDeviceProfile::GPU::Apple8, min_os_version); } else { ERR_FAIL_V_MSG(nullptr, vformat("Unsupported platform: %s", os_name)); } - return memnew(RenderingShaderContainerFormatMetal(profile, true, min_os_version)); + ERR_FAIL_NULL_V(profile, nullptr); + return memnew(RenderingShaderContainerFormatMetal(profile, true)); } bool ShaderBakerExportPluginPlatformMetal::matches_driver(const String &p_driver) { diff --git a/servers/rendering/rendering_shader_container.cpp b/servers/rendering/rendering_shader_container.cpp index b108568d1a9f..dab68d7b7143 100644 --- a/servers/rendering/rendering_shader_container.cpp +++ b/servers/rendering/rendering_shader_container.cpp @@ -43,20 +43,35 @@ static inline uint32_t aligned_to(uint32_t p_size, uint32_t p_alignment) { } } -RenderingShaderContainer::ReflectedShaderStage::ReflectedShaderStage() : - _module(memnew(SpvReflectShaderModule)) { +template +const T &RenderingShaderContainer::ReflectSymbol::get_spv_reflect(RDC::ShaderStage p_stage) const { + const T *info = _spv_reflect[get_index_for_stage(p_stage)]; + DEV_ASSERT(info != nullptr); // Caller is expected to specify valid shader stages + return *info; } -RenderingShaderContainer::ReflectedShaderStage::~ReflectedShaderStage() { +template +void RenderingShaderContainer::ReflectSymbol::set_spv_reflect(RDC::ShaderStage p_stage, const T *p_spv) { + stages.set_flag(1 << p_stage); + _spv_reflect[get_index_for_stage(p_stage)] = p_spv; +} + +RenderingShaderContainer::ReflectShaderStage::ReflectShaderStage() { + _module = memnew(SpvReflectShaderModule); + memset(_module, 0, sizeof(SpvReflectShaderModule)); +} + +RenderingShaderContainer::ReflectShaderStage::~ReflectShaderStage() { spvReflectDestroyShaderModule(_module); memdelete(_module); + _module = nullptr; } -const SpvReflectShaderModule &RenderingShaderContainer::ReflectedShaderStage::module() const { +const SpvReflectShaderModule &RenderingShaderContainer::ReflectShaderStage::module() const { return *_module; } -const Span RenderingShaderContainer::ReflectedShaderStage::spirv() const { +const Span RenderingShaderContainer::ReflectShaderStage::spirv() const { return _spirv_data.span().reinterpret(); } @@ -120,34 +135,128 @@ uint32_t RenderingShaderContainer::_to_bytes_footer_extra_data(uint8_t *) const return 0; } -void RenderingShaderContainer::_set_from_shader_reflection_post(const RenderingDeviceCommons::ShaderReflection &p_reflection) { +void RenderingShaderContainer::_set_from_shader_reflection_post(const ReflectShader &p_shader) { // Do nothing. } -Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span p_spirv, LocalVector &r_refl) { +static RenderingDeviceCommons::DataFormat spv_image_format_to_data_format(const SpvImageFormat p_format) { using RDC = RenderingDeviceCommons; - RDC::ShaderReflection reflection; + switch (p_format) { + case SpvImageFormatUnknown: + return RDC::DATA_FORMAT_MAX; + case SpvImageFormatRgba32f: + return RDC::DATA_FORMAT_R32G32B32A32_SFLOAT; + case SpvImageFormatRgba16f: + return RDC::DATA_FORMAT_R16G16B16A16_SFLOAT; + case SpvImageFormatR32f: + return RDC::DATA_FORMAT_R32_SFLOAT; + case SpvImageFormatRgba8: + return RDC::DATA_FORMAT_R8G8B8A8_UNORM; + case SpvImageFormatRgba8Snorm: + return RDC::DATA_FORMAT_R8G8B8A8_SNORM; + case SpvImageFormatRg32f: + return RDC::DATA_FORMAT_R32G32_SFLOAT; + case SpvImageFormatRg16f: + return RDC::DATA_FORMAT_R16G16_SFLOAT; + case SpvImageFormatR11fG11fB10f: + return RDC::DATA_FORMAT_B10G11R11_UFLOAT_PACK32; + case SpvImageFormatR16f: + return RDC::DATA_FORMAT_R16_SFLOAT; + case SpvImageFormatRgba16: + return RDC::DATA_FORMAT_R16G16B16A16_UNORM; + case SpvImageFormatRgb10A2: + return RDC::DATA_FORMAT_A2B10G10R10_UNORM_PACK32; + case SpvImageFormatRg16: + return RDC::DATA_FORMAT_R16G16_UNORM; + case SpvImageFormatRg8: + return RDC::DATA_FORMAT_R8G8_UNORM; + case SpvImageFormatR16: + return RDC::DATA_FORMAT_R16_UNORM; + case SpvImageFormatR8: + return RDC::DATA_FORMAT_R8_UNORM; + case SpvImageFormatRgba16Snorm: + return RDC::DATA_FORMAT_R16G16B16A16_SNORM; + case SpvImageFormatRg16Snorm: + return RDC::DATA_FORMAT_R16G16_SNORM; + case SpvImageFormatRg8Snorm: + return RDC::DATA_FORMAT_R8G8_SNORM; + case SpvImageFormatR16Snorm: + return RDC::DATA_FORMAT_R16_SNORM; + case SpvImageFormatR8Snorm: + return RDC::DATA_FORMAT_R8_SNORM; + case SpvImageFormatRgba32i: + return RDC::DATA_FORMAT_R32G32B32A32_SINT; + case SpvImageFormatRgba16i: + return RDC::DATA_FORMAT_R16G16B16A16_SINT; + case SpvImageFormatRgba8i: + return RDC::DATA_FORMAT_R8G8B8A8_SINT; + case SpvImageFormatR32i: + return RDC::DATA_FORMAT_R32_SINT; + case SpvImageFormatRg32i: + return RDC::DATA_FORMAT_R32G32_SINT; + case SpvImageFormatRg16i: + return RDC::DATA_FORMAT_R16G16_SINT; + case SpvImageFormatRg8i: + return RDC::DATA_FORMAT_R8G8_SINT; + case SpvImageFormatR16i: + return RDC::DATA_FORMAT_R16_SINT; + case SpvImageFormatR8i: + return RDC::DATA_FORMAT_R8_SINT; + case SpvImageFormatRgba32ui: + return RDC::DATA_FORMAT_R32G32B32A32_UINT; + case SpvImageFormatRgba16ui: + return RDC::DATA_FORMAT_R16G16B16A16_UINT; + case SpvImageFormatRgba8ui: + return RDC::DATA_FORMAT_R8G8B8A8_UINT; + case SpvImageFormatR32ui: + return RDC::DATA_FORMAT_R32_UINT; + case SpvImageFormatRgb10a2ui: + return RDC::DATA_FORMAT_A2B10G10R10_UINT_PACK32; + case SpvImageFormatRg32ui: + return RDC::DATA_FORMAT_R32G32_UINT; + case SpvImageFormatRg16ui: + return RDC::DATA_FORMAT_R16G16_UINT; + case SpvImageFormatRg8ui: + return RDC::DATA_FORMAT_R8G8_UINT; + case SpvImageFormatR16ui: + return RDC::DATA_FORMAT_R16_UINT; + case SpvImageFormatR8ui: + return RDC::DATA_FORMAT_R8_UINT; + case SpvImageFormatR64ui: + return RDC::DATA_FORMAT_R64_UINT; + case SpvImageFormatR64i: + return RDC::DATA_FORMAT_R64_SINT; + case SpvImageFormatMax: + return RDC::DATA_FORMAT_MAX; + } + return RDC::DATA_FORMAT_MAX; +} + +Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span p_spirv, ReflectShader &r_shader) { + ReflectShader &reflection = r_shader; shader_name = p_shader_name.utf8(); const uint32_t spirv_size = p_spirv.size() + 0; + + LocalVector &r_refl = r_shader.shader_stages; r_refl.resize(spirv_size); for (uint32_t i = 0; i < spirv_size; i++) { RDC::ShaderStage stage = p_spirv[i].shader_stage; - RDC::ShaderStage stage_flag = (RDC::ShaderStage)(1 << p_spirv[i].shader_stage); - r_refl[i].shader_stage = p_spirv[i].shader_stage; + RDC::ShaderStage stage_flag = (RDC::ShaderStage)(1 << stage); + r_refl[i].shader_stage = stage; r_refl[i]._spirv_data = p_spirv[i].spirv; const Vector &dynamic_buffers = p_spirv[i].dynamic_buffers; - if (p_spirv[i].shader_stage == RDC::SHADER_STAGE_COMPUTE) { - reflection.is_compute = true; + if (stage == RDC::SHADER_STAGE_COMPUTE) { ERR_FAIL_COND_V_MSG(spirv_size != 1, FAILED, "Compute shaders can only receive one stage, dedicated to compute."); } ERR_FAIL_COND_V_MSG(reflection.stages_bits.has_flag(stage_flag), FAILED, "Stage " + String(RDC::SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + " submitted more than once."); + reflection.stages_bits.set_flag(stage_flag); { SpvReflectShaderModule &module = *r_refl.ptr()[i]._module; @@ -163,7 +272,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< } } - if (reflection.is_compute) { + if (reflection.is_compute()) { reflection.compute_local_size[0] = module.entry_points->local_size.x; reflection.compute_local_size[1] = module.entry_points->local_size.y; reflection.compute_local_size[2] = module.entry_points->local_size.z; @@ -186,11 +295,13 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< for (uint32_t j = 0; j < binding_count; j++) { const SpvReflectDescriptorBinding &binding = *bindings[j]; - RDC::ShaderUniform uniform; + ReflectUniform uniform; + uniform.set_spv_reflect(stage, &binding); bool need_array_dimensions = false; bool need_block_size = false; bool may_be_writable = false; + bool is_image = false; switch (binding.descriptor_type) { case SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLER: { @@ -200,24 +311,29 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< case SPV_REFLECT_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: { uniform.type = RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; need_array_dimensions = true; + is_image = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLED_IMAGE: { uniform.type = RDC::UNIFORM_TYPE_TEXTURE; need_array_dimensions = true; + is_image = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_IMAGE: { uniform.type = RDC::UNIFORM_TYPE_IMAGE; need_array_dimensions = true; may_be_writable = true; + is_image = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: { uniform.type = RDC::UNIFORM_TYPE_TEXTURE_BUFFER; need_array_dimensions = true; + is_image = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: { uniform.type = RDC::UNIFORM_TYPE_IMAGE_BUFFER; need_array_dimensions = true; may_be_writable = true; + is_image = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { const uint64_t key = ShaderRD::DynamicBuffer::encode(binding.set, binding.binding); @@ -251,6 +367,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< case SPV_REFLECT_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: { uniform.type = RDC::UNIFORM_TYPE_INPUT_ATTACHMENT; need_array_dimensions = true; + is_image = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { ERR_PRINT("Acceleration structure not supported."); @@ -259,18 +376,10 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< } if (need_array_dimensions) { - if (binding.array.dims_count == 0) { - uniform.length = 1; - } else { - for (uint32_t k = 0; k < binding.array.dims_count; k++) { - if (k == 0) { - uniform.length = binding.array.dims[0]; - } else { - uniform.length *= binding.array.dims[k]; - } - } + uniform.length = 1; + for (uint32_t k = 0; k < binding.array.dims_count; k++) { + uniform.length *= binding.array.dims[k]; } - } else if (need_block_size) { uniform.length = binding.block.size; } else { @@ -287,6 +396,10 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< uniform.writable = false; } + if (is_image) { + uniform.image.format = spv_image_format_to_data_format(binding.image.image_format); + } + uniform.binding = binding.binding; uint32_t set = binding.set; @@ -296,7 +409,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< if (set < (uint32_t)reflection.uniform_sets.size()) { // Check if this already exists. bool exists = false; - for (int k = 0; k < reflection.uniform_sets[set].size(); k++) { + for (uint32_t k = 0; k < reflection.uniform_sets[set].size(); k++) { if (reflection.uniform_sets[set][k].binding == uniform.binding) { // Already exists, verify that it's the same type. ERR_FAIL_COND_V_MSG(reflection.uniform_sets[set][k].type != uniform.type, FAILED, @@ -311,7 +424,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< "On shader stage '" + String(RDC::SHADER_STAGE_NAMES[stage]) + "', uniform '" + binding.name + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different writability."); // Just append stage mask and return. - reflection.uniform_sets.write[set].write[k].stages.set_flag(stage_flag); + reflection.uniform_sets[set][k].stages.set_flag(stage_flag); exists = true; break; } @@ -328,7 +441,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< reflection.uniform_sets.resize(set + 1); } - reflection.uniform_sets.write[set].push_back(uniform); + reflection.uniform_sets[set].push_back(uniform); } } @@ -350,8 +463,9 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< for (uint32_t j = 0; j < sc_count; j++) { int32_t existing = -1; - RDC::ShaderSpecializationConstant sconst; + ReflectSpecializationConstant sconst; SpvReflectSpecializationConstant *spc = spec_constants[j]; + sconst.set_spv_reflect(stage, spc); sconst.constant_id = spc->constant_id; sconst.int_value = 0; // Clear previous value JIC. @@ -371,7 +485,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< } sconst.stages.set_flag(stage_flag); - for (int k = 0; k < reflection.specialization_constants.size(); k++) { + for (uint32_t k = 0; k < reflection.specialization_constants.size(); k++) { if (reflection.specialization_constants[k].constant_id == sconst.constant_id) { ERR_FAIL_COND_V_MSG(reflection.specialization_constants[k].type != sconst.type, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their types differ."); ERR_FAIL_COND_V_MSG(reflection.specialization_constants[k].int_value != sconst.int_value, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their default values differ."); @@ -381,7 +495,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< } if (existing >= 0) { - reflection.specialization_constants.write[existing].stages.set_flag(stage_flag); + reflection.specialization_constants[existing].stages.set_flag(stage_flag); } else { reflection.specialization_constants.push_back(sconst); } @@ -476,13 +590,11 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< //print_line("Stage: " + String(RDC::SHADER_STAGE_NAMES[stage]) + " push constant of size=" + itos(push_constant.push_constant_size)); } } - - reflection.stages_bits.set_flag(stage_flag); } // Sort all uniform_sets by binding. for (uint32_t i = 0; i < reflection.uniform_sets.size(); i++) { - reflection.uniform_sets.write[i].sort(); + reflection.uniform_sets[i].sort(); } set_from_shader_reflection(reflection); @@ -490,7 +602,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< return OK; } -void RenderingShaderContainer::set_from_shader_reflection(const RenderingDeviceCommons::ShaderReflection &p_reflection) { +void RenderingShaderContainer::set_from_shader_reflection(const ReflectShader &p_reflection) { reflection_binding_set_uniforms_count.clear(); reflection_binding_set_uniforms_data.clear(); reflection_specialization_data.clear(); @@ -499,7 +611,7 @@ void RenderingShaderContainer::set_from_shader_reflection(const RenderingDeviceC reflection_data.vertex_input_mask = p_reflection.vertex_input_mask; reflection_data.fragment_output_mask = p_reflection.fragment_output_mask; reflection_data.specialization_constants_count = p_reflection.specialization_constants.size(); - reflection_data.is_compute = p_reflection.is_compute; + reflection_data.is_compute = p_reflection.is_compute(); reflection_data.has_multiview = p_reflection.has_multiview; reflection_data.has_dynamic_buffers = p_reflection.has_dynamic_buffers; reflection_data.compute_local_size[0] = p_reflection.compute_local_size[0]; @@ -511,8 +623,8 @@ void RenderingShaderContainer::set_from_shader_reflection(const RenderingDeviceC reflection_data.shader_name_len = shader_name.length(); ReflectionBindingData binding_data; - for (const Vector &uniform_set : p_reflection.uniform_sets) { - for (const RenderingDeviceCommons::ShaderUniform &uniform : uniform_set) { + for (const ReflectDescriptorSet &uniform_set : p_reflection.uniform_sets) { + for (const ReflectUniform &uniform : uniform_set) { binding_data.type = uint32_t(uniform.type); binding_data.binding = uniform.binding; binding_data.stages = uint32_t(uniform.stages); @@ -525,7 +637,7 @@ void RenderingShaderContainer::set_from_shader_reflection(const RenderingDeviceC } ReflectionSpecializationData specialization_data; - for (const RenderingDeviceCommons::ShaderSpecializationConstant &spec : p_reflection.specialization_constants) { + for (const ReflectSpecializationConstant &spec : p_reflection.specialization_constants) { specialization_data.type = uint32_t(spec.type); specialization_data.constant_id = spec.constant_id; specialization_data.int_value = spec.int_value; @@ -533,9 +645,9 @@ void RenderingShaderContainer::set_from_shader_reflection(const RenderingDeviceC reflection_specialization_data.push_back(specialization_data); } - for (uint32_t i = 0; i < RenderingDeviceCommons::SHADER_STAGE_MAX; i++) { - if (p_reflection.stages_bits.has_flag(RenderingDeviceCommons::ShaderStage(1U << i))) { - reflection_shader_stages.push_back(RenderingDeviceCommons::ShaderStage(i)); + for (uint32_t i = 0; i < RDC::SHADER_STAGE_MAX; i++) { + if (p_reflection.stages_bits.has_flag(RDC::ShaderStage(1U << i))) { + reflection_shader_stages.push_back(RDC::ShaderStage(i)); } } @@ -544,14 +656,14 @@ void RenderingShaderContainer::set_from_shader_reflection(const RenderingDeviceC _set_from_shader_reflection_post(p_reflection); } -bool RenderingShaderContainer::set_code_from_spirv(const String &p_shader_name, Span p_spirv) { - LocalVector spirv; - ERR_FAIL_COND_V(reflect_spirv(p_shader_name, p_spirv, spirv) != OK, false); - return _set_code_from_spirv(spirv.span()); +bool RenderingShaderContainer::set_code_from_spirv(const String &p_shader_name, Span p_spirv) { + ReflectShader shader; + ERR_FAIL_COND_V(reflect_spirv(p_shader_name, p_spirv, shader) != OK, false); + return _set_code_from_spirv(shader); } RenderingDeviceCommons::ShaderReflection RenderingShaderContainer::get_shader_reflection() const { - RenderingDeviceCommons::ShaderReflection shader_refl; + RDC::ShaderReflection shader_refl; shader_refl.push_constant_size = reflection_data.push_constant_size; shader_refl.push_constant_stages = reflection_data.push_constant_stages_mask; shader_refl.vertex_input_mask = reflection_data.vertex_input_mask; @@ -569,13 +681,13 @@ RenderingDeviceCommons::ShaderReflection RenderingShaderContainer::get_shader_re DEV_ASSERT(reflection_binding_set_uniforms_count.size() == reflection_data.set_count && "The amount of elements in the reflection and the shader container can't be different."); uint32_t uniform_index = 0; for (uint32_t i = 0; i < reflection_data.set_count; i++) { - Vector &uniform_set = shader_refl.uniform_sets.ptrw()[i]; + Vector &uniform_set = shader_refl.uniform_sets.ptrw()[i]; uint32_t uniforms_count = reflection_binding_set_uniforms_count[i]; uniform_set.resize(uniforms_count); for (uint32_t j = 0; j < uniforms_count; j++) { const ReflectionBindingData &binding = reflection_binding_set_uniforms_data[uniform_index++]; - RenderingDeviceCommons::ShaderUniform &uniform = uniform_set.ptrw()[j]; - uniform.type = RenderingDeviceCommons::UniformType(binding.type); + RDC::ShaderUniform &uniform = uniform_set.ptrw()[j]; + uniform.type = RDC::UniformType(binding.type); uniform.writable = binding.writable; uniform.length = binding.length; uniform.binding = binding.binding; @@ -586,8 +698,8 @@ RenderingDeviceCommons::ShaderReflection RenderingShaderContainer::get_shader_re shader_refl.specialization_constants.resize(reflection_data.specialization_constants_count); for (uint32_t i = 0; i < reflection_data.specialization_constants_count; i++) { const ReflectionSpecializationData &spec = reflection_specialization_data[i]; - RenderingDeviceCommons::ShaderSpecializationConstant &sc = shader_refl.specialization_constants.ptrw()[i]; - sc.type = RenderingDeviceCommons::PipelineSpecializationConstantType(spec.type); + RDC::ShaderSpecializationConstant &sc = shader_refl.specialization_constants.ptrw()[i]; + sc.type = RDC::PipelineSpecializationConstantType(spec.type); sc.constant_id = spec.constant_id; sc.int_value = spec.int_value; sc.stages = spec.stage_flags; @@ -596,7 +708,7 @@ RenderingDeviceCommons::ShaderReflection RenderingShaderContainer::get_shader_re shader_refl.stages_vector.resize(reflection_data.stage_count); for (uint32_t i = 0; i < reflection_data.stage_count; i++) { shader_refl.stages_vector.set(i, reflection_shader_stages[i]); - shader_refl.stages_bits.set_flag(RenderingDeviceCommons::ShaderStage(1U << reflection_shader_stages[i])); + shader_refl.stages_bits.set_flag(RDC::ShaderStage(1U << reflection_shader_stages[i])); } return shader_refl; @@ -672,11 +784,11 @@ bool RenderingShaderContainer::from_bytes(const PackedByteArray &p_bytes) { const uint32_t stage_count = reflection_data.stage_count; if (stage_count > 0) { - ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + stage_count * sizeof(RenderingDeviceCommons::ShaderStage)) > p_bytes.size(), false, "Not enough bytes for stages in shader container."); + ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + stage_count * sizeof(RDC::ShaderStage)) > p_bytes.size(), false, "Not enough bytes for stages in shader container."); reflection_shader_stages.resize(stage_count); bytes_offset += _from_bytes_shader_extra_data_start(&bytes_ptr[bytes_offset]); - memcpy(reflection_shader_stages.ptrw(), &bytes_ptr[bytes_offset], stage_count * sizeof(RenderingDeviceCommons::ShaderStage)); - bytes_offset += stage_count * sizeof(RenderingDeviceCommons::ShaderStage); + memcpy(reflection_shader_stages.ptrw(), &bytes_ptr[bytes_offset], stage_count * sizeof(RDC::ShaderStage)); + bytes_offset += stage_count * sizeof(RDC::ShaderStage); } // Read shaders. @@ -687,7 +799,7 @@ bool RenderingShaderContainer::from_bytes(const PackedByteArray &p_bytes) { ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + header.code_compressed_size) > p_bytes.size(), false, "Not enough bytes for a shader in shader container."); Shader &shader = shaders.ptrw()[i]; - shader.shader_stage = RenderingDeviceCommons::ShaderStage(header.shader_stage); + shader.shader_stage = RDC::ShaderStage(header.shader_stage); shader.code_compression_flags = header.code_compression_flags; shader.code_decompressed_size = header.code_decompressed_size; shader.code_compressed_bytes.resize(header.code_compressed_size); @@ -712,7 +824,7 @@ PackedByteArray RenderingShaderContainer::to_bytes() const { total_size += reflection_binding_set_uniforms_count.size() * sizeof(uint32_t); total_size += reflection_binding_set_uniforms_data.size() * sizeof(ReflectionBindingData); total_size += reflection_specialization_data.size() * sizeof(ReflectionSpecializationData); - total_size += reflection_shader_stages.size() * sizeof(RenderingDeviceCommons::ShaderStage); + total_size += reflection_shader_stages.size() * sizeof(RDC::ShaderStage); for (uint32_t i = 0; i < reflection_binding_set_uniforms_data.size(); i++) { total_size += _to_bytes_reflection_binding_uniform_extra_data(nullptr, i); @@ -777,8 +889,8 @@ PackedByteArray RenderingShaderContainer::to_bytes() const { if (!reflection_shader_stages.is_empty()) { uint32_t stage_count = reflection_shader_stages.size(); - memcpy(&bytes_ptr[bytes_offset], reflection_shader_stages.ptr(), stage_count * sizeof(RenderingDeviceCommons::ShaderStage)); - bytes_offset += stage_count * sizeof(RenderingDeviceCommons::ShaderStage); + memcpy(&bytes_ptr[bytes_offset], reflection_shader_stages.ptr(), stage_count * sizeof(RDC::ShaderStage)); + bytes_offset += stage_count * sizeof(RDC::ShaderStage); } for (uint32_t i = 0; i < shaders.size(); i++) { diff --git a/servers/rendering/rendering_shader_container.h b/servers/rendering/rendering_shader_container.h index 830d285206f6..364294d26935 100644 --- a/servers/rendering/rendering_shader_container.h +++ b/servers/rendering/rendering_shader_container.h @@ -34,6 +34,8 @@ #include "servers/rendering/rendering_device_commons.h" struct SpvReflectShaderModule; +struct SpvReflectDescriptorBinding; +struct SpvReflectSpecializationConstant; class RenderingShaderContainer : public RefCounted { GDSOFTCLASS(RenderingShaderContainer, RefCounted); @@ -43,6 +45,8 @@ class RenderingShaderContainer : public RefCounted { static const uint32_t CONTAINER_VERSION = 2; protected: + using RDC = RenderingDeviceCommons; + struct ContainerHeader { uint32_t magic_number = 0; uint32_t version = 0; @@ -96,7 +100,7 @@ class RenderingShaderContainer : public RefCounted { Vector reflection_binding_set_uniforms_count; Vector reflection_binding_set_uniforms_data; Vector reflection_specialization_data; - Vector reflection_shader_stages; + Vector reflection_shader_stages; virtual uint32_t _format() const = 0; virtual uint32_t _format_version() const = 0; @@ -120,30 +124,159 @@ class RenderingShaderContainer : public RefCounted { virtual uint32_t _to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const; virtual uint32_t _to_bytes_footer_extra_data(uint8_t *p_bytes) const; - // This method will be called when set_from_shader_reflection() is finished. Used to update internal structures to match the reflection if necessary. - virtual void _set_from_shader_reflection_post(const RenderingDeviceCommons::ShaderReflection &p_reflection); + template + struct ReflectSymbol { + static constexpr uint32_t STAGE_INDEX[RDC::SHADER_STAGE_MAX] = { + 0, // SHADER_STAGE_VERTEX + 1, // SHADER_STAGE_FRAGMENT + 0, // SHADER_STAGE_TESSELATION_CONTROL + 1, // SHADER_STAGE_TESSELATION_EVALUATION + 0, // SHADER_STAGE_COMPUTE + }; + + BitField stages = {}; + + private: + const T *_spv_reflect[2] = { nullptr }; + + public: + _FORCE_INLINE_ constexpr uint32_t get_index_for_stage(RDC::ShaderStage p_stage) const { + DEV_ASSERT(stages.has_flag((1 << p_stage))); + return STAGE_INDEX[p_stage]; + } + + const T &get_spv_reflect(RDC::ShaderStage p_stage) const; + + /*! Returns the first valid stage if multiple stages are set. + * + * Crashes if no stages are set. + */ + const T &get_spv_reflect() const { + for (const T *d : _spv_reflect) { + if (d != nullptr) { + return *d; + } + } + CRASH_NOW_MSG("No stages set in ReflectSymbol"); + } + void set_spv_reflect(RDC::ShaderStage p_stage, const T *p_spv); + }; + + struct ReflectImageTraits { + RDC::DataFormat format = RDC::DATA_FORMAT_MAX; + }; + + struct ReflectUniform : ReflectSymbol { + RDC::UniformType type = RDC::UniformType::UNIFORM_TYPE_MAX; + uint32_t binding = 0; + + ReflectImageTraits image; + + uint32_t length = 0; // Size of arrays (in total elements), or ubos (in bytes * total elements). + bool writable = false; + + bool operator<(const ReflectUniform &p_other) const { + if (binding != p_other.binding) { + return binding < p_other.binding; + } + if (type != p_other.type) { + return type < p_other.type; + } + if (writable != p_other.writable) { + return writable < p_other.writable; + } + if (stages != p_other.stages) { + return stages < p_other.stages; + } + if (length != p_other.length) { + return length < p_other.length; + } + return false; + } + }; - class ReflectedShaderStage { + struct ReflectSpecializationConstant : ReflectSymbol { + RDC::PipelineSpecializationConstantType type = {}; + uint32_t constant_id = 0xffffffff; + union { + uint32_t int_value = 0; + float float_value; + bool bool_value; + }; + + bool operator<(const ReflectSpecializationConstant &p_other) const { return constant_id < p_other.constant_id; } + }; + + class ReflectShaderStage { friend class RenderingShaderContainer; Vector _spirv_data; SpvReflectShaderModule *_module = nullptr; public: - RenderingDeviceCommons::ShaderStage shader_stage = RenderingDeviceCommons::SHADER_STAGE_MAX; + RDC::ShaderStage shader_stage = RDC::SHADER_STAGE_MAX; const SpvReflectShaderModule &module() const; const Span spirv() const; const Vector spirv_data() const { return _spirv_data; } - ReflectedShaderStage(); - ~ReflectedShaderStage(); + ReflectShaderStage(); + ~ReflectShaderStage(); }; + typedef LocalVector ReflectDescriptorSet; + + struct ReflectShader { + uint64_t vertex_input_mask = 0; + uint32_t fragment_output_mask = 0; + uint32_t compute_local_size[3] = {}; + uint32_t push_constant_size = 0; + bool has_multiview = false; + bool has_dynamic_buffers = false; + + LocalVector shader_stages; + LocalVector uniform_sets; + LocalVector> reflect_uniforms; + LocalVector specialization_constants; + LocalVector> reflect_specialization_constants; + LocalVector stages_vector; + BitField stages_bits = {}; + BitField push_constant_stages = {}; + + _FORCE_INLINE_ bool is_compute() const { + return stages_bits.has_flag(RDC::SHADER_STAGE_COMPUTE_BIT); + } + + /*! Returns the uniform at the specified global index. + * + * This is a flattened view of all uniform sets. + */ + ReflectUniform &uniform_at(uint32_t p_index) { + for (LocalVector &set : uniform_sets) { + if (p_index < set.size()) { + return set[p_index]; + } + p_index -= set.size(); + } + CRASH_NOW_MSG(vformat("Uniform index %d out of range (total %d)", p_index, uniform_count())); + } + + uint32_t uniform_count() const { + uint32_t count = 0; + for (const LocalVector &set : uniform_sets) { + count += set.size(); + } + return count; + } + }; + + // This method will be called when set_from_shader_reflection() is finished. Used to update internal structures to match the reflection if necessary. + virtual void _set_from_shader_reflection_post(const ReflectShader &p_shader); + // This method will be called when set_code_from_spirv() is called. - virtual bool _set_code_from_spirv(Span p_spirv) = 0; + virtual bool _set_code_from_spirv(const ReflectShader &p_shader) = 0; - void set_from_shader_reflection(const RenderingDeviceCommons::ShaderReflection &p_reflection); - Error reflect_spirv(const String &p_shader_name, Span p_spirv, LocalVector &r_refl); + void set_from_shader_reflection(const ReflectShader &p_reflection); + Error reflect_spirv(const String &p_shader_name, Span p_spirv, ReflectShader &r_shader); public: enum CompressionFlags { @@ -151,7 +284,7 @@ class RenderingShaderContainer : public RefCounted { }; struct Shader { - RenderingDeviceCommons::ShaderStage shader_stage = RenderingDeviceCommons::SHADER_STAGE_MAX; + RDC::ShaderStage shader_stage = RDC::SHADER_STAGE_MAX; PackedByteArray code_compressed_bytes; uint32_t code_compression_flags = 0; uint32_t code_decompressed_size = 0; @@ -160,8 +293,8 @@ class RenderingShaderContainer : public RefCounted { CharString shader_name; Vector shaders; - bool set_code_from_spirv(const String &p_shader_name, Span p_spirv); - RenderingDeviceCommons::ShaderReflection get_shader_reflection() const; + bool set_code_from_spirv(const String &p_shader_name, Span p_spirv); + RDC::ShaderReflection get_shader_reflection() const; bool from_bytes(const PackedByteArray &p_bytes); PackedByteArray to_bytes() const; bool compress_code(const uint8_t *p_decompressed_bytes, uint32_t p_decompressed_size, uint8_t *p_compressed_bytes, uint32_t *r_compressed_size, uint32_t *r_compressed_flags) const;