diff --git a/CHANGELOG.md b/CHANGELOG.md index e347053f4b..c05a026d49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,11 @@ Bottom level categories: ## Unreleased +### Bug Fixes + +#### Vulkan +- Fixed a variety of mesh shader SPIR-V writer issues from the original implementation. By @inner-daemons in [#8756](https://github.com/gfx-rs/wgpu/pull/8756) + ## v28.0.0 (2025-12-17) ### Major Changes diff --git a/examples/features/src/framework.rs b/examples/features/src/framework.rs index aef3ca6626..67803a3195 100644 --- a/examples/features/src/framework.rs +++ b/examples/features/src/framework.rs @@ -260,7 +260,8 @@ impl ExampleContext { async fn init_async(surface: &mut SurfaceWrapper, window: Arc) -> Self { log::info!("Initializing wgpu..."); - let instance_descriptor = wgpu::InstanceDescriptor::from_env_or_default(); + let mut instance_descriptor = wgpu::InstanceDescriptor::from_env_or_default(); + instance_descriptor.backend_options.dx12.shader_compiler = wgpu::Dx12Compiler::StaticDxc; let instance = wgpu::Instance::new(&instance_descriptor); surface.pre_adapter(&instance, window); let adapter = get_adapter_with_capabilities_or_from_env( diff --git a/examples/features/src/mesh_shader/mod.rs b/examples/features/src/mesh_shader/mod.rs index 20f069e9a8..70aacec190 100644 --- a/examples/features/src/mesh_shader/mod.rs +++ b/examples/features/src/mesh_shader/mod.rs @@ -1,9 +1,15 @@ // Same as in mesh shader tests fn compile_wgsl(device: &wgpu::Device) -> wgpu::ShaderModule { - device.create_shader_module(wgpu::ShaderModuleDescriptor { - label: None, - source: wgpu::ShaderSource::Wgsl(include_str!("shader.wgsl").into()), - }) + // Workgroup memory zero initialization can be expensive for mesh shaders + unsafe { + device.create_shader_module_trusted( + wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(include_str!("shader.wgsl").into()), + }, + wgpu::ShaderRuntimeChecks::unchecked(), + ) + } } fn compile_hlsl(device: &wgpu::Device, entry: &str, stage_str: &str) -> wgpu::ShaderModule { let out_path = format!( diff --git a/examples/features/src/mesh_shader/shader.wgsl b/examples/features/src/mesh_shader/shader.wgsl index e0d03ea13d..78732a6841 100644 --- a/examples/features/src/mesh_shader/shader.wgsl +++ b/examples/features/src/mesh_shader/shader.wgsl @@ -33,12 +33,15 @@ var workgroupData: f32; @task @payload(taskPayload) -@workgroup_size(1) -fn ts_main() -> @builtin(mesh_task_size) vec3 { - workgroupData = 1.0; - taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0); - taskPayload.visible = true; - return vec3(1, 1, 1); +@workgroup_size(64) +fn ts_main(@builtin(local_invocation_id) thread_id: vec3) -> @builtin(mesh_task_size) vec3 { + if thread_id.x == 0 { + workgroupData = 1.0; + taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0); + taskPayload.visible = true; + return vec3(1, 1, 1); + } + return vec3(0, 0, 0); } struct MeshOutput { @@ -52,24 +55,27 @@ var mesh_output: MeshOutput; @mesh(mesh_output) @payload(taskPayload) -@workgroup_size(1) -fn ms_main() { - mesh_output.vertex_count = 3; - mesh_output.primitive_count = 1; - workgroupData = 2.0; +@workgroup_size(64) +fn ms_main(@builtin(local_invocation_id) thread_id: vec3) { + if thread_id.x == 0 { + mesh_output.vertex_count = 3; + mesh_output.primitive_count = 1; + workgroupData = 2.0; - mesh_output.vertices[0].position = positions[0]; - mesh_output.vertices[0].color = colors[0] * taskPayload.colorMask; + mesh_output.vertices[0].position = positions[0]; + mesh_output.vertices[0].color = colors[0] * taskPayload.colorMask; - mesh_output.vertices[1].position = positions[1]; - mesh_output.vertices[1].color = colors[1] * taskPayload.colorMask; + mesh_output.vertices[1].position = positions[1]; + mesh_output.vertices[1].color = colors[1] * taskPayload.colorMask; - mesh_output.vertices[2].position = positions[2]; - mesh_output.vertices[2].color = colors[2] * taskPayload.colorMask; + mesh_output.vertices[2].position = positions[2]; + mesh_output.vertices[2].color = colors[2] * taskPayload.colorMask; - mesh_output.primitives[0].indices = vec3(0, 1, 2); - mesh_output.primitives[0].cull = !taskPayload.visible; - mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); + mesh_output.primitives[0].indices = vec3(0, 1, 2); + mesh_output.primitives[0].cull = !taskPayload.visible; + mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); + return; + } } @fragment diff --git a/naga-test/src/lib.rs b/naga-test/src/lib.rs index 51db424ff7..90ec37f1cb 100644 --- a/naga-test/src/lib.rs +++ b/naga-test/src/lib.rs @@ -164,6 +164,11 @@ impl SpirvOutParameters { ray_query_initialization_tracking: true, debug_info, use_storage_input_output_16: self.use_storage_input_output_16, + // Choose + task_runtime_limits: Some(spv::TaskRuntimeLimits { + max_mesh_workgroups_per_dim: 256, + max_mesh_workgroups_total: 1024, + }), } } } diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 485c967c28..c1b7a6d84d 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -221,12 +221,11 @@ impl Writer { ir_result: &crate::FunctionResult, result_members: &[ResultMember], body: &mut Vec, - task_payload: Option, ) -> Result { for (index, res_member) in result_members.iter().enumerate() { // This isn't a real builtin, and is handled elsewhere if res_member.built_in == Some(crate::BuiltIn::MeshTaskSize) { - continue; + return Ok(Instruction::return_value(value_id)); } let member_value_id = match ir_result.binding { Some(_) => value_id, @@ -258,13 +257,7 @@ impl Writer { _ => {} } } - self.try_write_entry_point_task_return( - value_id, - ir_result, - result_members, - body, - task_payload, - ) + Ok(Instruction::return_void()) } } @@ -3267,7 +3260,6 @@ impl BlockContext<'_> { self.ir_function.result.as_ref().unwrap(), &context.results, &mut block.body, - context.task_payload_variable_id, )?, None => Instruction::return_value(value_id), }; @@ -3275,18 +3267,7 @@ impl BlockContext<'_> { return Ok(BlockExitDisposition::Discarded); } Statement::Return { value: None } => { - if let Some(super::EntryPointContext { - mesh_state: Some(ref mesh_state), - .. - }) = self.function.entry_point_context - { - self.function.consume( - block, - Instruction::branch(mesh_state.entry_point_epilogue_id), - ); - } else { - self.function.consume(block, Instruction::return_void()); - } + self.function.consume(block, Instruction::return_void()); return Ok(BlockExitDisposition::Discarded); } Statement::Kill => { @@ -3746,16 +3727,6 @@ impl BlockContext<'_> { LoopContext::default(), debug_info, )?; - if let Some(super::EntryPointContext { - mesh_state: Some(ref mesh_state), - .. - }) = self.function.entry_point_context - { - let mut block = Block::new(mesh_state.entry_point_epilogue_id); - self.writer - .write_mesh_shader_return(mesh_state, &mut block)?; - self.function.consume(block, Instruction::return_void()); - } Ok(()) } diff --git a/naga/src/back/spv/mesh_shader.rs b/naga/src/back/spv/mesh_shader.rs index a026c763af..d0c08895aa 100644 --- a/naga/src/back/spv/mesh_shader.rs +++ b/naga/src/back/spv/mesh_shader.rs @@ -4,7 +4,7 @@ use spirv::Word; use crate::{ back::spv::{ helpers::BindingDecorations, writer::FunctionInterface, Block, EntryPointContext, Error, - Instruction, ResultMember, WriterFlags, + Instruction, WriterFlags, }, non_max_u32::NonMaxU32, Handle, @@ -36,15 +36,9 @@ pub struct MeshReturnInfo { /// All members of the output variable struct type out_members: Vec, /// Id of the input variable for local invocation id - local_invocation_index_id: Word, + local_invocation_index_var_id: Word, /// Total workgroup size (product) workgroup_size: u32, - /// Variable to be used later when saving the output as a loop index - loop_counter_vertices: Word, - /// Variable to be used later when saving the output as a loop index - loop_counter_primitives: Word, - /// The id of the label to jump to when `return` is called - pub entry_point_epilogue_id: Word, /// Vertex-specific info vertex_info: PerOutputTypeMeshReturnInfo, @@ -55,16 +49,6 @@ pub struct MeshReturnInfo { } impl super::Writer { - pub(super) fn require_mesh_shaders(&mut self) -> Result<(), Error> { - self.use_extension("SPV_EXT_mesh_shader"); - self.require_any("Mesh Shaders", &[spirv::Capability::MeshShadingEXT])?; - let lang_version = self.lang_version(); - if lang_version.0 <= 1 && lang_version.1 < 4 { - return Err(Error::SpirvVersionTooLow(1, 4)); - } - Ok(()) - } - /// Sets up an output variable that will handle part of the mesh shader output pub(super) fn write_mesh_return_global_variable( &mut self, @@ -88,7 +72,6 @@ impl super::Writer { iface: &mut FunctionInterface, local_invocation_index_id: Option, ir_module: &crate::Module, - prelude: &mut Block, ep_context: &mut EntryPointContext, ) -> Result<(), Error> { let Some(ref mesh_info) = iface.mesh_info else { @@ -146,7 +129,7 @@ impl super::Writer { _ => unreachable!(), }; // In the final return, we do a giant memcpy, for which this is helpful - let local_invocation_index_id = match local_invocation_index_id { + let local_invocation_index_var_id = match local_invocation_index_id { Some(a) => a, None => { let u32_id = self.get_u32_type_id(); @@ -166,48 +149,17 @@ impl super::Writer { .to_words(&mut self.logical_layout.annotations); iface.varying_ids.push(var); - let loaded_value = self.id_gen.next(); - prelude - .body - .push(Instruction::load(u32_id, loaded_value, var, None)); - loaded_value + var } }; - let u32_id = self.get_u32_type_id(); - // A general function variable that we guarantee to allow in the final return. It must be - // declared at the top of the function. Currently it is used in the memcpy part to keep - // track of the current index to copy. - let loop_counter_1 = self.id_gen.next(); - let loop_counter_2 = self.id_gen.next(); - prelude.body.insert( - 0, - Instruction::variable( - self.get_pointer_type_id(u32_id, spirv::StorageClass::Function), - loop_counter_1, - spirv::StorageClass::Function, - None, - ), - ); - prelude.body.insert( - 1, - Instruction::variable( - self.get_pointer_type_id(u32_id, spirv::StorageClass::Function), - loop_counter_2, - spirv::StorageClass::Function, - None, - ), - ); // This is the information that is passed to the function writer // so that it can write the final return logic let mut mesh_return_info = MeshReturnInfo { out_variable_id: self.global_variables[mesh_info.output_variable].var_id, out_members, - local_invocation_index_id, + local_invocation_index_var_id, workgroup_size: self .get_constant_scalar(crate::Literal::U32(iface.workgroup_size.iter().product())), - loop_counter_vertices: loop_counter_1, - loop_counter_primitives: loop_counter_2, - entry_point_epilogue_id: self.id_gen.next(), vertex_info: PerOutputTypeMeshReturnInfo { array_type_id: vertex_array_type_id, @@ -457,60 +409,33 @@ impl super::Writer { Ok(()) } - pub(super) fn try_write_entry_point_task_return( + pub(super) fn write_entry_point_task_return( &mut self, value_id: Word, - ir_result: &crate::FunctionResult, - result_members: &[ResultMember], body: &mut Vec, - task_payload: Option, + task_payload: Word, ) -> Result { // OpEmitMeshTasksEXT must be called right before exiting (after setting other // output variables if there are any) - for (index, res_member) in result_members.iter().enumerate() { - if res_member.built_in == Some(crate::BuiltIn::MeshTaskSize) { - self.write_control_barrier(crate::Barrier::WORK_GROUP, body); - // If its a function like `fn a() -> @builtin(...) vec3 ...` - // then just use the output value. If it's a struct, extract the - // value from the struct. - let member_value_id = match ir_result.binding { - Some(_) => value_id, - None => { - let member_value_id = self.id_gen.next(); - body.push(Instruction::composite_extract( - res_member.type_id, - member_value_id, - value_id, - &[index as Word], - )); - member_value_id - } - }; - // Extract the vec3 into 3 u32's - let values = [self.id_gen.next(), self.id_gen.next(), self.id_gen.next()]; - for (i, &value) in values.iter().enumerate() { - let instruction = Instruction::composite_extract( - self.get_u32_type_id(), - value, - member_value_id, - &[i as Word], - ); - body.push(instruction); - } - // TODO: make this guaranteed to be uniform - let mut instruction = Instruction::new(spirv::Op::EmitMeshTasksEXT); - for id in values { - instruction.add_operand(id); - } - // We have to include the task payload in our call - if let Some(task_payload) = task_payload { - instruction.add_operand(task_payload); - } - return Ok(instruction); - } + // Extract the vec3 into 3 u32's + let values = [self.id_gen.next(), self.id_gen.next(), self.id_gen.next()]; + for (i, &value) in values.iter().enumerate() { + let instruction = Instruction::composite_extract( + self.get_u32_type_id(), + value, + value_id, + &[i as Word], + ); + body.push(instruction); + } + let mut instruction = Instruction::new(spirv::Op::EmitMeshTasksEXT); + for id in values { + instruction.add_operand(id); } - Ok(Instruction::return_void()) + // We have to include the task payload in our call + instruction.add_operand(task_payload); + Ok(instruction) } /// This writes the actual loop @@ -708,9 +633,10 @@ impl super::Writer { &mut self, return_info: &MeshReturnInfo, block: &mut Block, + loop_counter_vertices: u32, + loop_counter_primitives: u32, + local_invocation_index_id: Word, ) -> Result<(), Error> { - // Start with a control barrier so that everything that follows is guaranteed to see the same variables - self.write_control_barrier(crate::Barrier::WORK_GROUP, &mut block.body); let u32_id = self.get_u32_type_id(); // Load the actual vertex and primitive counts @@ -780,8 +706,6 @@ impl super::Writer { return_info.primitive_info.array_type_id, ); - self.write_control_barrier(crate::Barrier::WORK_GROUP, &mut block.body); - // This must be called exactly once before any other mesh outputs are written { let mut ins = Instruction::new(spirv::Op::SetMeshOutputsEXT); @@ -798,8 +722,8 @@ impl super::Writer { let func_end = self.id_gen.next(); block.body.push(Instruction::store( - return_info.loop_counter_vertices, - return_info.local_invocation_index_id, + loop_counter_vertices, + local_invocation_index_id, None, )); block.body.push(Instruction::branch(vertex_loop_header)); @@ -807,7 +731,7 @@ impl super::Writer { let vertex_copy_body = self.write_mesh_copy_body( false, return_info, - return_info.loop_counter_vertices, + loop_counter_vertices, vert_array_ptr, prim_array_ptr, ); @@ -818,7 +742,7 @@ impl super::Writer { vertex_loop_header, in_between_loops, vert_count_id, - return_info.loop_counter_vertices, + loop_counter_vertices, return_info, ); @@ -827,8 +751,8 @@ impl super::Writer { block.body.push(Instruction::label(in_between_loops)); block.body.push(Instruction::store( - return_info.loop_counter_primitives, - return_info.local_invocation_index_id, + loop_counter_primitives, + local_invocation_index_id, None, )); @@ -837,7 +761,7 @@ impl super::Writer { let primitive_copy_body = self.write_mesh_copy_body( true, return_info, - return_info.loop_counter_primitives, + loop_counter_primitives, vert_array_ptr, prim_array_ptr, ); @@ -848,11 +772,211 @@ impl super::Writer { prim_loop_header, func_end, prim_count_id, - return_info.loop_counter_primitives, + loop_counter_primitives, return_info, ); block.body.push(Instruction::label(func_end)); Ok(()) } + + pub(super) fn write_mesh_shader_wrapper( + &mut self, + return_info: &MeshReturnInfo, + inner_id: u32, + ) -> Result { + let out_id = self.id_gen.next(); + let mut function = super::Function::default(); + let lookup_function_type = super::LookupFunctionType { + parameter_type_ids: alloc::vec![], + return_type_id: self.void_type, + }; + let function_type = self.get_function_type(lookup_function_type); + function.signature = Some(Instruction::function( + self.void_type, + out_id, + spirv::FunctionControl::empty(), + function_type, + )); + let u32_id = self.get_u32_type_id(); + { + let mut block = Block::new(self.id_gen.next()); + // A general function variable that we guarantee to allow in the final return. It must be + // declared at the top of the function. Currently it is used in the memcpy part to keep + // track of the current index to copy. + let loop_counter_vertices = self.id_gen.next(); + let loop_counter_primitives = self.id_gen.next(); + block.body.insert( + 0, + Instruction::variable( + self.get_pointer_type_id(u32_id, spirv::StorageClass::Function), + loop_counter_vertices, + spirv::StorageClass::Function, + None, + ), + ); + block.body.insert( + 1, + Instruction::variable( + self.get_pointer_type_id(u32_id, spirv::StorageClass::Function), + loop_counter_primitives, + spirv::StorageClass::Function, + None, + ), + ); + let local_invocation_index_id = self.id_gen.next(); + block.body.push(Instruction::load( + u32_id, + local_invocation_index_id, + return_info.local_invocation_index_var_id, + None, + )); + block.body.push(Instruction::function_call( + self.void_type, + self.id_gen.next(), + inner_id, + &[], + )); + self.write_control_barrier(crate::Barrier::WORK_GROUP, &mut block.body); + self.write_mesh_shader_return( + return_info, + &mut block, + loop_counter_vertices, + loop_counter_primitives, + local_invocation_index_id, + )?; + function.consume(block, Instruction::return_void()); + } + function.to_words(&mut self.logical_layout.function_definitions); + Ok(out_id) + } + + pub(super) fn write_task_shader_wrapper( + &mut self, + task_payload: Word, + inner_id: u32, + ) -> Result { + let out_id = self.id_gen.next(); + let mut function = super::Function::default(); + let lookup_function_type = super::LookupFunctionType { + parameter_type_ids: alloc::vec![], + return_type_id: self.void_type, + }; + let function_type = self.get_function_type(lookup_function_type); + function.signature = Some(Instruction::function( + self.void_type, + out_id, + spirv::FunctionControl::empty(), + function_type, + )); + + { + let mut block = Block::new(self.id_gen.next()); + let result = self.id_gen.next(); + block.body.push(Instruction::function_call( + self.get_vec3u_type_id(), + result, + inner_id, + &[], + )); + self.write_control_barrier(crate::Barrier::WORK_GROUP, &mut block.body); + let final_value = if let Some(task_limits) = self.task_runtime_limits { + let zero_u32 = self.get_constant_scalar(crate::Literal::U32(0)); + // If its greater than 2<<21 then overflow is possible without being caught + let max_per_dim = self.get_constant_scalar(crate::Literal::U64( + task_limits.max_mesh_workgroups_per_dim.min(2 << 21) as u64, + )); + let max_total = self.get_constant_scalar(crate::Literal::U64( + task_limits.max_mesh_workgroups_total as u64, + )); + let u64_type_id = self + .get_numeric_type_id(crate::back::spv::NumericType::Scalar(crate::Scalar::U64)); + let values = [self.id_gen.next(), self.id_gen.next(), self.id_gen.next()]; + for (i, value) in values.into_iter().enumerate() { + let u32_val = self.id_gen.next(); + block.body.push(Instruction::composite_extract( + self.get_u32_type_id(), + u32_val, + result, + &[i as u32], + )); + block.body.push(Instruction::unary( + spirv::Op::UConvert, + u64_type_id, + value, + u32_val, + )); + } + let prod_1 = self.id_gen.next(); + block.body.push(Instruction::binary( + spirv::Op::IMul, + u64_type_id, + prod_1, + values[0], + values[1], + )); + let prod = self.id_gen.next(); + block.body.push(Instruction::binary( + spirv::Op::IMul, + u64_type_id, + prod, + prod_1, + values[2], + )); + let total_too_large = self.id_gen.next(); + block.body.push(Instruction::binary( + spirv::Op::UGreaterThanEqual, + self.get_bool_type_id(), + total_too_large, + prod, + max_total, + )); + + let too_large = [self.id_gen.next(), self.id_gen.next(), self.id_gen.next()]; + for (i, value) in values.into_iter().enumerate() { + block.body.push(Instruction::binary( + spirv::Op::UGreaterThanEqual, + self.get_bool_type_id(), + too_large[i], + value, + max_per_dim, + )); + } + let mut current = total_too_large; + for is_too_large in too_large { + let new = self.id_gen.next(); + block.body.push(Instruction::binary( + spirv::Op::LogicalOr, + self.get_bool_type_id(), + new, + current, + is_too_large, + )); + current = new; + } + let zero_vec3 = self.id_gen.next(); + block.body.push(Instruction::composite_construct( + self.get_vec3u_type_id(), + zero_vec3, + &[zero_u32, zero_u32, zero_u32], + )); + let final_result = self.id_gen.next(); + block.body.push(Instruction::select( + self.get_vec3u_type_id(), + final_result, + current, + zero_vec3, + result, + )); + final_result + } else { + result + }; + let ins = + self.write_entry_point_task_return(final_value, &mut block.body, task_payload)?; + function.consume(block, ins); + } + function.to_words(&mut self.logical_layout.function_definitions); + Ok(out_id) + } } diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs index 2e67442583..64c9f8c81a 100644 --- a/naga/src/back/spv/mod.rs +++ b/naga/src/back/spv/mod.rs @@ -822,6 +822,8 @@ pub struct Writer { /// Non semantic debug printf extension `OpExtInstImport` debug_printf: Option, pub(crate) ray_query_initialization_tracking: bool, + + task_runtime_limits: Option, } bitflags::bitflags! { @@ -898,6 +900,12 @@ pub enum ZeroInitializeWorkgroupMemoryMode { None, } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct TaskRuntimeLimits { + pub max_mesh_workgroups_per_dim: u32, + pub max_mesh_workgroups_total: u32, +} + #[derive(Debug, Clone)] pub struct Options<'a> { /// (Major, Minor) target version of the SPIR-V. @@ -939,6 +947,8 @@ pub struct Options<'a> { pub use_storage_input_output_16: bool, pub debug_info: Option>, + + pub task_runtime_limits: Option, } impl Default for Options<'_> { @@ -961,6 +971,7 @@ impl Default for Options<'_> { ray_query_initialization_tracking: true, use_storage_input_output_16: true, debug_info: None, + task_runtime_limits: None, } } } diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index 5a640ae40a..0ee8f9d16d 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -108,6 +108,7 @@ impl Writer { options.use_storage_input_output_16, ), debug_printf: None, + task_runtime_limits: options.task_runtime_limits, }) } @@ -126,6 +127,7 @@ impl Writer { self.binding_map = options.binding_map.clone(); self.io_f16_polyfills = super::f16_polyfill::F16IoPolyfill::new(options.use_storage_input_output_16); + self.task_runtime_limits = options.task_runtime_limits; Ok(()) } @@ -164,6 +166,7 @@ impl Writer { capabilities_available: take(&mut self.capabilities_available), fake_missing_bindings: self.fake_missing_bindings, binding_map: take(&mut self.binding_map), + task_runtime_limits: self.task_runtime_limits, // Initialized afresh: id_gen, @@ -401,14 +404,6 @@ impl Writer { self.get_pointer_type_id(vec2u_id, class) } - pub(super) fn get_vec3u_pointer_type_id(&mut self, class: spirv::StorageClass) -> Word { - let vec3u_id = self.get_numeric_type_id(NumericType::Vector { - size: crate::VectorSize::Tri, - scalar: crate::Scalar::U32, - }); - self.get_pointer_type_id(vec3u_id, class) - } - pub(super) fn get_bool_type_id(&mut self) -> Word { self.get_numeric_type_id(NumericType::Scalar(crate::Scalar::BOOL)) } @@ -766,10 +761,9 @@ impl Writer { mesh_state: None, }; - let mut local_invocation_id = None; - let mut parameter_type_ids = Vec::with_capacity(ir_function.arguments.len()); + let mut local_invocation_index_var_id = None; let mut local_invocation_index_id = None; for argument in ir_function.arguments.iter() { @@ -799,13 +793,9 @@ impl Writer { varying_id, argument_type_id, ); - - if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) { - local_invocation_id = Some(id); - } else if binding - == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationIndex) - { + if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationIndex) { local_invocation_index_id = Some(id); + local_invocation_index_var_id = Some(varying_id); } id @@ -830,13 +820,10 @@ impl Writer { let id = self.load_io_with_f16_polyfill(&mut prelude.body, varying_id, type_id); constituent_ids.push(id); - - if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) { - local_invocation_id = Some(id); - } else if binding - == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationIndex) + if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationIndex) { local_invocation_index_id = Some(id); + local_invocation_index_var_id = Some(varying_id); } } prelude.body.push(Instruction::composite_construct( @@ -964,7 +951,11 @@ impl Writer { .body .push(Instruction::store(varying_id, default_value_id, None)); } - self.void_type + if iface.stage == crate::ShaderStage::Task { + self.get_vec3u_type_id() + } else { + self.void_type + } } else { self.get_handle_type_id(result.ty) } @@ -980,9 +971,8 @@ impl Writer { } self.write_entry_point_mesh_shader_info( iface, - local_invocation_index_id, + local_invocation_index_var_id, ir_module, - &mut prelude, &mut ep_context, )?; } @@ -1220,7 +1210,7 @@ impl Writer { next_id, ir_module, info, - local_invocation_id, + local_invocation_index_id, interface, context.function, ), @@ -1245,7 +1235,21 @@ impl Writer { function.to_words(&mut self.logical_layout.function_definitions); - Ok(function_id) + if let Some(EntryPointContext { + mesh_state: Some(ref mesh_state), + .. + }) = function.entry_point_context + { + self.write_mesh_shader_wrapper(mesh_state, function_id) + } else if let Some(EntryPointContext { + task_payload_variable_id: Some(tp), + .. + }) = function.entry_point_context + { + self.write_task_shader_wrapper(tp, function_id) + } else { + Ok(function_id) + } } fn write_execution_mode( @@ -1268,6 +1272,7 @@ impl Writer { debug_info: &Option, ) -> Result { let mut interface_ids = Vec::new(); + let function_id = self.write_function( &entry_point.function, info, @@ -1989,7 +1994,7 @@ impl Writer { entry_id: Word, ir_module: &crate::Module, info: &FunctionInfo, - local_invocation_id: Option, + local_invocation_index: Option, interface: &mut FunctionInterface, function: &mut Function, ) -> Option { @@ -1997,7 +2002,10 @@ impl Writer { .global_variables .iter() .filter(|&(handle, var)| { - !info[handle].is_empty() && var.space == crate::AddressSpace::WorkGroup + let task_exception = (var.space == crate::AddressSpace::TaskPayload) + && interface.stage == crate::ShaderStage::Task; + !info[handle].is_empty() + && (var.space == crate::AddressSpace::WorkGroup || task_exception) }) .map(|(handle, var)| { // It's safe to use `var_id` here, not `access_id`, because only @@ -2014,16 +2022,15 @@ impl Writer { return None; } - let uint3_type_id = self.get_vec3u_type_id(); - let mut pre_if_block = Block::new(entry_id); - let local_invocation_id = if let Some(local_invocation_id) = local_invocation_id { - local_invocation_id + let local_invocation_index = if let Some(local_invocation_index) = local_invocation_index { + local_invocation_index } else { let varying_id = self.id_gen.next(); let class = spirv::StorageClass::Input; - let pointer_type_id = self.get_vec3u_pointer_type_id(class); + let u32_ty_id = self.get_u32_type_id(); + let pointer_type_id = self.get_pointer_type_id(u32_ty_id, class); Instruction::variable(pointer_type_id, varying_id, class, None) .to_words(&mut self.logical_layout.declarations); @@ -2031,39 +2038,29 @@ impl Writer { self.decorate( varying_id, spirv::Decoration::BuiltIn, - &[spirv::BuiltIn::LocalInvocationId as u32], + &[spirv::BuiltIn::LocalInvocationIndex as u32], ); interface.varying_ids.push(varying_id); let id = self.id_gen.next(); pre_if_block .body - .push(Instruction::load(uint3_type_id, id, varying_id, None)); + .push(Instruction::load(u32_ty_id, id, varying_id, None)); id }; - let zero_id = self.get_constant_null(uint3_type_id); - let bool3_type_id = self.get_vec3_bool_type_id(); + let zero_id = self.get_constant_scalar(crate::Literal::U32(0)); let eq_id = self.id_gen.next(); pre_if_block.body.push(Instruction::binary( spirv::Op::IEqual, - bool3_type_id, + self.get_bool_type_id(), eq_id, - local_invocation_id, + local_invocation_index, zero_id, )); - let condition_id = self.id_gen.next(); - let bool_type_id = self.get_bool_type_id(); - pre_if_block.body.push(Instruction::relational( - spirv::Op::All, - bool_type_id, - condition_id, - eq_id, - )); - let merge_id = self.id_gen.next(); pre_if_block.body.push(Instruction::selection_merge( merge_id, @@ -2073,7 +2070,7 @@ impl Writer { let accept_id = self.id_gen.next(); function.consume( pre_if_block, - Instruction::branch_conditional(condition_id, accept_id, merge_id), + Instruction::branch_conditional(eq_id, accept_id, merge_id), ); let accept_block = Block { @@ -2298,7 +2295,6 @@ impl Writer { } if per_primitive && stage == crate::ShaderStage::Fragment { others.push(Decoration::PerPrimitiveEXT); - self.require_mesh_shaders()?; } Ok(BindingDecorations::Location { location, @@ -2310,13 +2306,6 @@ impl Writer { use crate::BuiltIn as Bi; let mut others = ArrayVec::new(); - if matches!( - built_in, - Bi::CullPrimitive | Bi::PointIndex | Bi::LineIndices | Bi::TriangleIndices - ) { - self.require_mesh_shaders()?; - } - let built_in = match built_in { Bi::Position { invariant } => { if invariant { @@ -2428,22 +2417,12 @@ impl Writer { BuiltIn::SubgroupLocalInvocationId } Bi::CullPrimitive => { - self.require_mesh_shaders()?; others.push(Decoration::PerPrimitiveEXT); BuiltIn::CullPrimitiveEXT } - Bi::PointIndex => { - self.require_mesh_shaders()?; - BuiltIn::PrimitivePointIndicesEXT - } - Bi::LineIndices => { - self.require_mesh_shaders()?; - BuiltIn::PrimitiveLineIndicesEXT - } - Bi::TriangleIndices => { - self.require_mesh_shaders()?; - BuiltIn::PrimitiveTriangleIndicesEXT - } + Bi::PointIndex => BuiltIn::PrimitivePointIndicesEXT, + Bi::LineIndices => BuiltIn::PrimitiveLineIndicesEXT, + Bi::TriangleIndices => BuiltIn::PrimitiveTriangleIndicesEXT, // No decoration, this EmitMeshTasksEXT is called at function return Bi::MeshTaskSize => return Ok(BindingDecorations::None), // These aren't normal builtins and don't occur in function output @@ -2790,17 +2769,6 @@ impl Writer { | ir_module.special_types.ray_intersection.is_some(); let has_vertex_return = ir_module.special_types.ray_vertex_return.is_some(); - // Ways mesh shaders are required: - // * Mesh entry point used - checked for - // * Mesh function like setVertex used outside mesh entry point, this is handled when those are written - // * Fragment shader with per primitive data - handled in `map_binding` - let has_mesh_shaders = ir_module.entry_points.iter().any(|entry| { - entry.stage == crate::ShaderStage::Mesh || entry.stage == crate::ShaderStage::Task - }) || ir_module - .global_variables - .iter() - .any(|gvar| gvar.1.space == crate::AddressSpace::TaskPayload); - for (_, &crate::Type { ref inner, .. }) in ir_module.types.iter() { // spirv does not know whether these have vertex return - that is done by us if let &crate::TypeInner::AccelerationStructure { .. } @@ -2827,8 +2795,13 @@ impl Writer { Instruction::extension("SPV_KHR_ray_tracing_position_fetch") .to_words(&mut self.logical_layout.extensions); } - if has_mesh_shaders { - self.require_mesh_shaders()?; + if ir_module.uses_mesh_shaders() { + self.use_extension("SPV_EXT_mesh_shader"); + self.require_any("Mesh Shaders", &[spirv::Capability::MeshShadingEXT])?; + let lang_version = self.lang_version(); + if lang_version.0 <= 1 && lang_version.1 < 4 { + return Err(Error::SpirvVersionTooLow(1, 4)); + } } Instruction::type_void(self.void_type).to_words(&mut self.logical_layout.declarations); Instruction::ext_inst_import(self.gl450_ext_inst_id, "GLSL.std.450") diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs index bbe1a1fb95..fb3a51dc22 100644 --- a/naga/src/back/wgsl/writer.rs +++ b/naga/src/back/wgsl/writer.rs @@ -278,7 +278,6 @@ impl Writer { let mut needs_f16 = false; let mut needs_dual_source_blending = false; let mut needs_clip_distances = false; - let mut needs_mesh_shaders = false; // Determine which `enable` declarations are needed for (_, ty) in module.types.iter() { @@ -299,25 +298,6 @@ impl Writer { crate::Binding::BuiltIn(crate::BuiltIn::ClipDistance) => { needs_clip_distances = true; } - crate::Binding::Location { - per_primitive: true, - .. - } => { - needs_mesh_shaders = true; - } - crate::Binding::BuiltIn( - crate::BuiltIn::MeshTaskSize - | crate::BuiltIn::CullPrimitive - | crate::BuiltIn::PointIndex - | crate::BuiltIn::LineIndices - | crate::BuiltIn::TriangleIndices - | crate::BuiltIn::VertexCount - | crate::BuiltIn::Vertices - | crate::BuiltIn::PrimitiveCount - | crate::BuiltIn::Primitives, - ) => { - needs_mesh_shaders = true; - } _ => {} } } @@ -326,22 +306,6 @@ impl Writer { } } - if module - .entry_points - .iter() - .any(|ep| matches!(ep.stage, ShaderStage::Mesh | ShaderStage::Task)) - { - needs_mesh_shaders = true; - } - - if module - .global_variables - .iter() - .any(|gv| gv.1.space == crate::AddressSpace::TaskPayload) - { - needs_mesh_shaders = true; - } - // Write required declarations let mut any_written = false; if needs_f16 { @@ -356,7 +320,7 @@ impl Writer { writeln!(self.out, "enable clip_distances;")?; any_written = true; } - if needs_mesh_shaders { + if module.uses_mesh_shaders() { writeln!(self.out, "enable wgpu_mesh_shader;")?; any_written = true; } diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs index bc1e72e113..a5a28ad8c0 100644 --- a/naga/src/proc/mod.rs +++ b/naga/src/proc/mod.rs @@ -662,6 +662,14 @@ impl super::ShaderStage { Self::Compute | Self::Task | Self::Mesh => true, } } + + /// Mesh or task shader + pub const fn mesh_like(self) -> bool { + match self { + Self::Task | Self::Mesh => true, + Self::Vertex | Self::Fragment | Self::Compute => false, + } + } } #[test] @@ -851,4 +859,64 @@ impl crate::Module { .map(|a| a.with_span_handle(self.global_variables[gv].ty, &self.types)), ) } + + pub fn uses_mesh_shaders(&self) -> bool { + for (_, ty) in self.types.iter() { + match ty.inner { + crate::TypeInner::Struct { ref members, .. } => { + for member in members { + if matches!( + member.binding, + Some(crate::Binding::BuiltIn( + crate::BuiltIn::MeshTaskSize + | crate::BuiltIn::CullPrimitive + | crate::BuiltIn::PointIndex + | crate::BuiltIn::LineIndices + | crate::BuiltIn::TriangleIndices + | crate::BuiltIn::VertexCount + | crate::BuiltIn::Vertices + | crate::BuiltIn::PrimitiveCount + | crate::BuiltIn::Primitives, + )) + ) { + return true; + } + } + } + _ => (), + } + } + if self.entry_points.iter().any(|ep| { + matches!( + ep.stage, + crate::ShaderStage::Mesh | crate::ShaderStage::Task + ) + }) { + return true; + } + if self + .global_variables + .iter() + .any(|gv| gv.1.space == crate::AddressSpace::TaskPayload) + { + return true; + } + false + } +} + +impl crate::MeshOutputTopology { + pub const fn to_builtin(self) -> crate::BuiltIn { + match self { + Self::Points => crate::BuiltIn::PointIndex, + Self::Lines => crate::BuiltIn::LineIndices, + Self::Triangles => crate::BuiltIn::TriangleIndices, + } + } +} + +impl crate::AddressSpace { + pub const fn is_workgroup_like(self) -> bool { + matches!(self, Self::WorkGroup | Self::TaskPayload) + } } diff --git a/naga/src/valid/analyzer.rs b/naga/src/valid/analyzer.rs index 7f759ee2dc..92cc2d3978 100644 --- a/naga/src/valid/analyzer.rs +++ b/naga/src/valid/analyzer.rs @@ -269,7 +269,7 @@ pub struct FunctionInfo { /// `FunctionInfo` implements `core::ops::Index>`, /// so you can simply index this struct with a global handle to retrieve /// its usage information. - global_uses: Box<[GlobalUse]>, + pub global_uses: Box<[GlobalUse]>, /// Information about each expression in this function's body. /// diff --git a/naga/src/valid/interface.rs b/naga/src/valid/interface.rs index faa0047832..e5b1e9ed2a 100644 --- a/naga/src/valid/interface.rs +++ b/naga/src/valid/interface.rs @@ -1087,9 +1087,11 @@ impl super::Validator { } // Task shaders must have a single `MeshTaskSize` output, and nothing else. if ep.stage == crate::ShaderStage::Task { - let ok = result_built_ins.contains(&crate::BuiltIn::MeshTaskSize) - && result_built_ins.len() == 1 - && self.location_mask.is_empty(); + let ok = module.types[fr.ty].inner + == crate::TypeInner::Vector { + size: crate::VectorSize::Tri, + scalar: crate::Scalar::U32, + }; if !ok { return Err(EntryPointError::WrongTaskShaderEntryResult.with_span()); } diff --git a/naga/tests/in/wgsl/mesh-shader-empty.wgsl b/naga/tests/in/wgsl/mesh-shader-empty.wgsl index 98a6bf8448..27ed996611 100644 --- a/naga/tests/in/wgsl/mesh-shader-empty.wgsl +++ b/naga/tests/in/wgsl/mesh-shader-empty.wgsl @@ -17,7 +17,7 @@ var taskPayload: TaskPayload; @task @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1, 1, 1); } @@ -33,5 +33,5 @@ var mesh_output: MeshOutput; @mesh(mesh_output) @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ms_main() {} diff --git a/naga/tests/in/wgsl/mesh-shader-lines.wgsl b/naga/tests/in/wgsl/mesh-shader-lines.wgsl index c475ff1061..c07bc2a7a5 100644 --- a/naga/tests/in/wgsl/mesh-shader-lines.wgsl +++ b/naga/tests/in/wgsl/mesh-shader-lines.wgsl @@ -17,7 +17,7 @@ var taskPayload: TaskPayload; @task @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1, 1, 1); } @@ -33,5 +33,5 @@ var mesh_output: MeshOutput; @mesh(mesh_output) @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ms_main() {} diff --git a/naga/tests/in/wgsl/mesh-shader-points.wgsl b/naga/tests/in/wgsl/mesh-shader-points.wgsl index 84516ee8f2..31429e2df5 100644 --- a/naga/tests/in/wgsl/mesh-shader-points.wgsl +++ b/naga/tests/in/wgsl/mesh-shader-points.wgsl @@ -17,7 +17,7 @@ var taskPayload: TaskPayload; @task @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1, 1, 1); } @@ -33,5 +33,5 @@ var mesh_output: MeshOutput; @mesh(mesh_output) @payload(taskPayload) -@workgroup_size(1) +@workgroup_size(64) fn ms_main() {} diff --git a/naga/tests/in/wgsl/mesh-shader.wgsl b/naga/tests/in/wgsl/mesh-shader.wgsl index ca2f9c911a..3e8740ec56 100644 --- a/naga/tests/in/wgsl/mesh-shader.wgsl +++ b/naga/tests/in/wgsl/mesh-shader.wgsl @@ -31,28 +31,25 @@ struct PrimitiveInput { var taskPayload: TaskPayload; var workgroupData: f32; -@task -@payload(taskPayload) -@workgroup_size(1) -fn ts_main() -> @builtin(mesh_task_size) vec3 { - workgroupData = 1.0; - taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0); - taskPayload.visible = true; - return vec3(1, 1, 1); +fn helper_reader() -> bool { + return taskPayload.visible; +} +fn helper_writer(value: bool) { + taskPayload.visible = value; } -// This tests if we can properly write a task shader that is divergent @task @payload(taskPayload) -@workgroup_size(2) -fn ts_divergent(@builtin(local_invocation_index) thread_id: u32) -> @builtin(mesh_task_size) vec3 { - if thread_id == 0 { +@workgroup_size(64) +fn ts_main(@builtin(local_invocation_id) thread_id: vec3) -> @builtin(mesh_task_size) vec3 { + if thread_id.x == 0 { taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0); - taskPayload.visible = true; + helper_writer(true); + taskPayload.visible = helper_reader(); return vec3(1, 1, 1); } // Only the first thread's value is taken - return vec3(2,2,2); + return vec3(0, 0, 0); } struct MeshOutput { @@ -66,52 +63,33 @@ var mesh_output: MeshOutput; @mesh(mesh_output) @payload(taskPayload) -@workgroup_size(1) -fn ms_main() { - mesh_output.vertex_count = 3; - mesh_output.primitive_count = 1; - workgroupData = 2.0; - - mesh_output.vertices[0].position = positions[0]; - mesh_output.vertices[0].color = colors[0] * taskPayload.colorMask; - - mesh_output.vertices[1].position = positions[1]; - mesh_output.vertices[1].color = colors[1] * taskPayload.colorMask; - - mesh_output.vertices[2].position = positions[2]; - mesh_output.vertices[2].color = colors[2] * taskPayload.colorMask; - - mesh_output.primitives[0].indices = vec3(0, 1, 2); - mesh_output.primitives[0].cull = !taskPayload.visible; - mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); -} - -@mesh(mesh_output) -@workgroup_size(1) -fn ms_no_ts() { - mesh_output.vertex_count = 3; - mesh_output.primitive_count = 1; - workgroupData = 2.0; +@workgroup_size(64) +fn ms_main(@builtin(local_invocation_id) thread_id: vec3) { + if thread_id.x == 0 { + mesh_output.vertex_count = 3; + mesh_output.primitive_count = 1; + workgroupData = 2.0; - mesh_output.vertices[0].position = positions[0]; - mesh_output.vertices[0].color = colors[0]; + mesh_output.vertices[0].position = positions[0]; + mesh_output.vertices[0].color = colors[0] * taskPayload.colorMask; - mesh_output.vertices[1].position = positions[1]; - mesh_output.vertices[1].color = colors[1]; + mesh_output.vertices[1].position = positions[1]; + mesh_output.vertices[1].color = colors[1] * taskPayload.colorMask; - mesh_output.vertices[2].position = positions[2]; - mesh_output.vertices[2].color = colors[2]; + mesh_output.vertices[2].position = positions[2]; + mesh_output.vertices[2].color = colors[2] * taskPayload.colorMask; - mesh_output.primitives[0].indices = vec3(0, 1, 2); - mesh_output.primitives[0].cull = false; - mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); + mesh_output.primitives[0].indices = vec3(0, 1, 2); + mesh_output.primitives[0].cull = !helper_reader(); + mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); + return; + } } -// See ts_divergent comment @mesh(mesh_output) -@workgroup_size(1) -fn ms_divergent(@builtin(local_invocation_index) thread_id: u32) { - if thread_id == 0 { +@workgroup_size(64) +fn ms_no_ts(@builtin(local_invocation_id) thread_id: vec3) { + if thread_id.x == 0 { mesh_output.vertex_count = 3; mesh_output.primitive_count = 1; workgroupData = 2.0; @@ -128,7 +106,6 @@ fn ms_divergent(@builtin(local_invocation_index) thread_id: u32) { mesh_output.primitives[0].indices = vec3(0, 1, 2); mesh_output.primitives[0].cull = false; mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); - // "Early" return return; } } diff --git a/naga/tests/out/spv/wgsl-abstract-types-operators.spvasm b/naga/tests/out/spv/wgsl-abstract-types-operators.spvasm index 4ce80049d2..c64e2c1b76 100644 --- a/naga/tests/out/spv/wgsl-abstract-types-operators.spvasm +++ b/naga/tests/out/spv/wgsl-abstract-types-operators.spvasm @@ -1,14 +1,14 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 131 +; Bound: 127 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %112 "main" %116 +OpEntryPoint GLCompute %112 "main" %115 OpExecutionMode %112 LocalSize 1 1 1 OpDecorate %6 ArrayStride 4 -OpDecorate %116 BuiltIn LocalInvocationId +OpDecorate %115 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeFloat 32 %4 = OpTypeInt 32 1 @@ -56,13 +56,10 @@ OpDecorate %116 BuiltIn LocalInvocationId %102 = OpConstant %3 7 %108 = OpTypePointer Workgroup %5 %114 = OpConstantNull %6 -%115 = OpTypeVector %5 3 -%117 = OpTypePointer Input %115 -%116 = OpVariable %117 Input -%119 = OpConstantNull %115 -%121 = OpTypeBool -%120 = OpTypeVector %121 3 -%126 = OpConstant %5 264 +%116 = OpTypePointer Input %5 +%115 = OpVariable %116 Input +%119 = OpTypeBool +%122 = OpConstant %5 264 %20 = OpFunction %2 None %21 %19 = OpLabel %63 = OpVariable %36 Function %64 @@ -152,20 +149,19 @@ OpFunctionEnd %111 = OpLabel OpBranch %113 %113 = OpLabel -%118 = OpLoad %115 %116 -%122 = OpIEqual %120 %118 %119 -%123 = OpAll %121 %122 -OpSelectionMerge %124 None -OpBranchConditional %123 %125 %124 -%125 = OpLabel +%117 = OpLoad %5 %115 +%118 = OpIEqual %119 %117 %11 +OpSelectionMerge %120 None +OpBranchConditional %118 %121 %120 +%121 = OpLabel OpStore %17 %114 -OpBranch %124 -%124 = OpLabel -OpControlBarrier %30 %30 %126 -OpBranch %127 -%127 = OpLabel -%128 = OpFunctionCall %2 %20 -%129 = OpFunctionCall %2 %100 -%130 = OpFunctionCall %2 %105 +OpBranch %120 +%120 = OpLabel +OpControlBarrier %30 %30 %122 +OpBranch %123 +%123 = OpLabel +%124 = OpFunctionCall %2 %20 +%125 = OpFunctionCall %2 %100 +%126 = OpFunctionCall %2 %105 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-atomicOps-int64.spvasm b/naga/tests/out/spv/wgsl-atomicOps-int64.spvasm index d3c2b26be9..c544556caa 100644 --- a/naga/tests/out/spv/wgsl-atomicOps-int64.spvasm +++ b/naga/tests/out/spv/wgsl-atomicOps-int64.spvasm @@ -8,7 +8,7 @@ OpCapability Int64 OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %32 "cs_main" %29 +OpEntryPoint GLCompute %32 "cs_main" %29 %49 OpExecutionMode %32 LocalSize 2 1 1 OpDecorate %5 ArrayStride 8 OpMemberDecorate %8 0 Offset 0 @@ -30,6 +30,7 @@ OpDecorate %19 Binding 2 OpDecorate %20 Block OpMemberDecorate %20 0 Offset 0 OpDecorate %29 BuiltIn LocalInvocationId +OpDecorate %49 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 64 0 %4 = OpTypeInt 64 1 @@ -70,8 +71,8 @@ OpDecorate %29 BuiltIn LocalInvocationId %46 = OpConstantNull %3 %47 = OpConstantNull %5 %48 = OpConstantNull %8 -%49 = OpConstantNull %9 -%50 = OpTypeVector %10 3 +%50 = OpTypePointer Input %7 +%49 = OpVariable %50 Input %55 = OpConstant %7 264 %58 = OpTypeInt 32 1 %57 = OpConstant %58 1 @@ -87,8 +88,8 @@ OpDecorate %29 BuiltIn LocalInvocationId %40 = OpAccessChain %39 %19 %35 OpBranch %45 %45 = OpLabel -%51 = OpIEqual %50 %31 %49 -%52 = OpAll %10 %51 +%51 = OpLoad %7 %49 +%52 = OpIEqual %10 %51 %35 OpSelectionMerge %53 None OpBranchConditional %52 %54 %53 %54 = OpLabel diff --git a/naga/tests/out/spv/wgsl-atomicOps.spvasm b/naga/tests/out/spv/wgsl-atomicOps.spvasm index 839200d769..70f07928d5 100644 --- a/naga/tests/out/spv/wgsl-atomicOps.spvasm +++ b/naga/tests/out/spv/wgsl-atomicOps.spvasm @@ -6,7 +6,7 @@ OpCapability Shader OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %31 "cs_main" %28 +OpEntryPoint GLCompute %31 "cs_main" %28 %47 OpExecutionMode %31 LocalSize 2 1 1 OpDecorate %5 ArrayStride 4 OpMemberDecorate %7 0 Offset 0 @@ -28,6 +28,7 @@ OpDecorate %18 Binding 2 OpDecorate %19 Block OpMemberDecorate %19 0 Offset 0 OpDecorate %28 BuiltIn LocalInvocationId +OpDecorate %47 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeInt 32 1 @@ -66,8 +67,8 @@ OpDecorate %28 BuiltIn LocalInvocationId %44 = OpConstantNull %3 %45 = OpConstantNull %5 %46 = OpConstantNull %7 -%47 = OpConstantNull %8 -%48 = OpTypeVector %9 3 +%48 = OpTypePointer Input %3 +%47 = OpVariable %48 Input %53 = OpConstant %3 264 %55 = OpTypePointer StorageBuffer %4 %59 = OpTypePointer Workgroup %4 @@ -79,8 +80,8 @@ OpDecorate %28 BuiltIn LocalInvocationId %39 = OpAccessChain %38 %18 %34 OpBranch %43 %43 = OpLabel -%49 = OpIEqual %48 %30 %47 -%50 = OpAll %9 %49 +%49 = OpLoad %3 %47 +%50 = OpIEqual %9 %49 %34 OpSelectionMerge %51 None OpBranchConditional %50 %52 %51 %52 = OpLabel diff --git a/naga/tests/out/spv/wgsl-globals.spvasm b/naga/tests/out/spv/wgsl-globals.spvasm index 1b5c7a3122..4be9b15701 100644 --- a/naga/tests/out/spv/wgsl-globals.spvasm +++ b/naga/tests/out/spv/wgsl-globals.spvasm @@ -1,12 +1,12 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 171 +; Bound: 167 OpCapability Shader OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %93 "main" %116 +OpEntryPoint GLCompute %93 "main" %115 OpExecutionMode %93 LocalSize 1 1 1 OpDecorate %5 ArrayStride 4 OpMemberDecorate %9 0 Offset 0 @@ -52,7 +52,7 @@ OpDecorate %49 Block OpMemberDecorate %49 0 Offset 0 OpMemberDecorate %49 0 ColMajor OpMemberDecorate %49 0 MatrixStride 8 -OpDecorate %116 BuiltIn LocalInvocationId +OpDecorate %115 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeBool %4 = OpTypeFloat 32 @@ -126,27 +126,24 @@ OpDecorate %116 BuiltIn LocalInvocationId %111 = OpTypePointer Function %3 %113 = OpConstantNull %5 %114 = OpConstantNull %7 -%115 = OpTypeVector %7 3 -%117 = OpTypePointer Input %115 -%116 = OpVariable %117 Input -%119 = OpConstantNull %115 -%120 = OpTypeVector %3 3 -%125 = OpConstant %7 264 -%128 = OpTypePointer Workgroup %4 -%129 = OpTypePointer Uniform %21 -%130 = OpTypePointer Uniform %20 -%133 = OpTypePointer Uniform %17 -%134 = OpTypePointer Uniform %16 -%135 = OpTypePointer Uniform %12 -%140 = OpConstant %7 7 -%146 = OpConstant %7 6 -%148 = OpTypePointer StorageBuffer %10 -%149 = OpConstant %7 1 -%152 = OpConstant %7 5 -%154 = OpTypePointer Uniform %4 -%155 = OpConstant %7 3 -%158 = OpConstant %7 4 -%170 = OpConstant %23 2 +%116 = OpTypePointer Input %7 +%115 = OpVariable %116 Input +%121 = OpConstant %7 264 +%124 = OpTypePointer Workgroup %4 +%125 = OpTypePointer Uniform %21 +%126 = OpTypePointer Uniform %20 +%129 = OpTypePointer Uniform %17 +%130 = OpTypePointer Uniform %16 +%131 = OpTypePointer Uniform %12 +%136 = OpConstant %7 7 +%142 = OpConstant %7 6 +%144 = OpTypePointer StorageBuffer %10 +%145 = OpConstant %7 1 +%148 = OpConstant %7 5 +%150 = OpTypePointer Uniform %4 +%151 = OpConstant %7 3 +%154 = OpConstant %7 4 +%166 = OpConstant %23 2 %53 = OpFunction %2 None %54 %52 = OpFunctionParameter %8 %51 = OpLabel @@ -198,56 +195,55 @@ OpFunctionEnd %106 = OpAccessChain %105 %48 %60 OpBranch %112 %112 = OpLabel -%118 = OpLoad %115 %116 -%121 = OpIEqual %120 %118 %119 -%122 = OpAll %3 %121 -OpSelectionMerge %123 None -OpBranchConditional %122 %124 %123 -%124 = OpLabel +%117 = OpLoad %7 %115 +%118 = OpIEqual %3 %117 %60 +OpSelectionMerge %119 None +OpBranchConditional %118 %120 %119 +%120 = OpLabel OpStore %26 %113 OpStore %28 %114 -OpBranch %123 -%123 = OpLabel -OpControlBarrier %18 %18 %125 -OpBranch %126 -%126 = OpLabel -%127 = OpFunctionCall %2 %57 -%131 = OpAccessChain %130 %106 %60 %60 -%132 = OpLoad %20 %131 -%136 = OpAccessChain %135 %104 %60 %60 %60 -%137 = OpLoad %12 %136 -%138 = OpMatrixTimesVector %10 %132 %137 -%139 = OpCompositeExtract %4 %138 0 -%141 = OpAccessChain %128 %26 %140 -OpStore %141 %139 -%142 = OpLoad %15 %102 -%143 = OpLoad %8 %100 -%144 = OpMatrixTimesVector %10 %142 %143 -%145 = OpCompositeExtract %4 %144 0 -%147 = OpAccessChain %128 %26 %146 -OpStore %147 %145 -%150 = OpAccessChain %73 %96 %149 %149 -%151 = OpLoad %4 %150 -%153 = OpAccessChain %128 %26 %152 -OpStore %153 %151 -%156 = OpAccessChain %154 %98 %60 %155 +OpBranch %119 +%119 = OpLabel +OpControlBarrier %18 %18 %121 +OpBranch %122 +%122 = OpLabel +%123 = OpFunctionCall %2 %57 +%127 = OpAccessChain %126 %106 %60 %60 +%128 = OpLoad %20 %127 +%132 = OpAccessChain %131 %104 %60 %60 %60 +%133 = OpLoad %12 %132 +%134 = OpMatrixTimesVector %10 %128 %133 +%135 = OpCompositeExtract %4 %134 0 +%137 = OpAccessChain %124 %26 %136 +OpStore %137 %135 +%138 = OpLoad %15 %102 +%139 = OpLoad %8 %100 +%140 = OpMatrixTimesVector %10 %138 %139 +%141 = OpCompositeExtract %4 %140 0 +%143 = OpAccessChain %124 %26 %142 +OpStore %143 %141 +%146 = OpAccessChain %73 %96 %145 %145 +%147 = OpLoad %4 %146 +%149 = OpAccessChain %124 %26 %148 +OpStore %149 %147 +%152 = OpAccessChain %150 %98 %60 %151 +%153 = OpLoad %4 %152 +%155 = OpAccessChain %124 %26 %154 +OpStore %155 %153 +%156 = OpAccessChain %73 %94 %145 %157 = OpLoad %4 %156 -%159 = OpAccessChain %128 %26 %158 -OpStore %159 %157 -%160 = OpAccessChain %73 %94 %149 -%161 = OpLoad %4 %160 -%162 = OpAccessChain %128 %26 %155 -OpStore %162 %161 -%163 = OpAccessChain %73 %94 %60 %60 -%164 = OpLoad %4 %163 -%165 = OpAccessChain %128 %26 %18 +%158 = OpAccessChain %124 %26 %151 +OpStore %158 %157 +%159 = OpAccessChain %73 %94 %60 %60 +%160 = OpLoad %4 %159 +%161 = OpAccessChain %124 %26 %18 +OpStore %161 %160 +%162 = OpAccessChain %73 %94 %145 +OpStore %162 %107 +%163 = OpArrayLength %7 %33 0 +%164 = OpConvertUToF %4 %163 +%165 = OpAccessChain %124 %26 %145 OpStore %165 %164 -%166 = OpAccessChain %73 %94 %149 -OpStore %166 %107 -%167 = OpArrayLength %7 %33 0 -%168 = OpConvertUToF %4 %167 -%169 = OpAccessChain %128 %26 %149 -OpStore %169 %168 -OpAtomicStore %28 %170 %60 %18 +OpAtomicStore %28 %166 %60 %18 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-interface.compute.spvasm b/naga/tests/out/spv/wgsl-interface.compute.spvasm index 912d28d5b0..c3fb080b73 100644 --- a/naga/tests/out/spv/wgsl-interface.compute.spvasm +++ b/naga/tests/out/spv/wgsl-interface.compute.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.0 ; Generator: rspirv -; Bound: 53 +; Bound: 50 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 @@ -43,12 +43,10 @@ OpDecorate %27 BuiltIn NumWorkgroups %27 = OpVariable %18 Input %30 = OpTypeFunction %2 %32 = OpConstantNull %9 -%33 = OpConstantNull %11 -%34 = OpTypeVector %8 3 -%39 = OpConstant %6 2 -%40 = OpConstant %6 264 -%42 = OpTypePointer Workgroup %6 -%51 = OpConstant %6 0 +%33 = OpConstant %6 0 +%37 = OpConstant %6 2 +%38 = OpConstant %6 264 +%40 = OpTypePointer Workgroup %6 %29 = OpFunction %2 None %30 %16 = OpLabel %19 = OpLoad %11 %17 @@ -58,26 +56,25 @@ OpDecorate %27 BuiltIn NumWorkgroups %28 = OpLoad %11 %27 OpBranch %31 %31 = OpLabel -%35 = OpIEqual %34 %21 %33 -%36 = OpAll %8 %35 -OpSelectionMerge %37 None -OpBranchConditional %36 %38 %37 -%38 = OpLabel +%34 = OpIEqual %8 %24 %33 +OpSelectionMerge %35 None +OpBranchConditional %34 %36 %35 +%36 = OpLabel OpStore %14 %32 -OpBranch %37 -%37 = OpLabel -OpControlBarrier %39 %39 %40 -OpBranch %41 -%41 = OpLabel -%43 = OpCompositeExtract %6 %19 0 -%44 = OpCompositeExtract %6 %21 0 -%45 = OpIAdd %6 %43 %44 -%46 = OpIAdd %6 %45 %24 -%47 = OpCompositeExtract %6 %26 0 +OpBranch %35 +%35 = OpLabel +OpControlBarrier %37 %37 %38 +OpBranch %39 +%39 = OpLabel +%41 = OpCompositeExtract %6 %19 0 +%42 = OpCompositeExtract %6 %21 0 +%43 = OpIAdd %6 %41 %42 +%44 = OpIAdd %6 %43 %24 +%45 = OpCompositeExtract %6 %26 0 +%46 = OpIAdd %6 %44 %45 +%47 = OpCompositeExtract %6 %28 0 %48 = OpIAdd %6 %46 %47 -%49 = OpCompositeExtract %6 %28 0 -%50 = OpIAdd %6 %48 %49 -%52 = OpAccessChain %42 %14 %51 -OpStore %52 %50 +%49 = OpAccessChain %40 %14 %33 +OpStore %49 %48 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-mesh-shader-empty.spvasm b/naga/tests/out/spv/wgsl-mesh-shader-empty.spvasm index 9abe629377..f0af5ff35b 100644 --- a/naga/tests/out/spv/wgsl-mesh-shader-empty.spvasm +++ b/naga/tests/out/spv/wgsl-mesh-shader-empty.spvasm @@ -1,23 +1,24 @@ ; SPIR-V ; Version: 1.4 ; Generator: rspirv -; Bound: 100 +; Bound: 130 OpCapability Shader OpCapability MeshShadingEXT +OpCapability Int64 OpExtension "SPV_EXT_mesh_shader" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint TaskEXT %20 "ts_main" %15 -OpEntryPoint MeshEXT %44 "ms_main" %15 %30 %40 %43 %17 %47 -OpExecutionMode %20 LocalSize 1 1 1 -OpExecutionMode %44 LocalSize 1 1 1 -OpExecutionMode %44 OutputTrianglesNV -OpExecutionMode %44 OutputVertices 3 -OpExecutionMode %44 OutputPrimitivesNV 1 -OpDecorate %30 BuiltIn LocalInvocationIndex -OpMemberDecorate %37 0 BuiltIn Position -OpDecorate %37 Block -OpDecorate %43 BuiltIn PrimitiveTriangleIndicesEXT +OpEntryPoint TaskEXT %36 "ts_main" %15 %25 +OpEntryPoint MeshEXT %82 "ms_main" %15 %64 %69 %72 %17 %76 +OpExecutionMode %36 LocalSize 64 1 1 +OpExecutionMode %82 LocalSize 64 1 1 +OpExecutionMode %82 OutputTrianglesNV +OpExecutionMode %82 OutputVertices 3 +OpExecutionMode %82 OutputPrimitivesNV 1 +OpDecorate %64 BuiltIn LocalInvocationIndex +OpMemberDecorate %66 0 BuiltIn Position +OpDecorate %66 Block +OpDecorate %72 BuiltIn PrimitiveTriangleIndicesEXT OpMemberDecorate %4 0 Offset 0 OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %9 0 Offset 0 @@ -27,7 +28,8 @@ OpMemberDecorate %14 0 Offset 0 OpMemberDecorate %14 1 Offset 48 OpMemberDecorate %14 2 Offset 64 OpMemberDecorate %14 3 Offset 68 -OpDecorate %47 BuiltIn LocalInvocationId +OpDecorate %25 BuiltIn LocalInvocationIndex +OpDecorate %76 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeStruct %3 @@ -45,119 +47,157 @@ OpDecorate %47 BuiltIn LocalInvocationId %15 = OpVariable %16 TaskPayloadWorkgroupEXT %18 = OpTypePointer Workgroup %14 %17 = OpVariable %18 Workgroup -%21 = OpTypeFunction %2 +%21 = OpTypeFunction %8 %22 = OpConstantComposite %8 %13 %13 %13 -%24 = OpConstant %3 2 -%25 = OpConstant %3 264 -%31 = OpTypePointer Input %3 -%30 = OpVariable %31 Input -%35 = OpTypePointer Function %3 -%37 = OpTypeStruct %5 -%38 = OpTypeArray %37 %11 -%39 = OpTypePointer Output %38 -%40 = OpVariable %39 Output -%41 = OpTypeArray %8 %13 -%42 = OpTypePointer Output %41 -%43 = OpVariable %42 Output -%46 = OpConstantNull %14 -%48 = OpTypePointer Input %8 -%47 = OpVariable %48 Input -%50 = OpConstantNull %8 -%52 = OpTypeBool -%51 = OpTypeVector %52 3 -%59 = OpTypePointer Workgroup %3 -%66 = OpTypePointer Workgroup %10 -%67 = OpConstant %3 0 -%69 = OpTypePointer Workgroup %12 -%76 = OpTypePointer Workgroup %5 -%79 = OpTypePointer Output %5 -%89 = OpTypePointer Workgroup %8 -%92 = OpTypePointer Output %8 -%20 = OpFunction %2 None %21 +%24 = OpConstantNull %4 +%26 = OpTypePointer Input %3 +%25 = OpVariable %26 Input +%28 = OpConstant %3 0 +%30 = OpTypeBool +%33 = OpConstant %3 2 +%34 = OpConstant %3 264 +%37 = OpTypeFunction %2 +%41 = OpTypeInt 64 0 +%40 = OpConstant %41 256 +%42 = OpConstant %41 1024 +%64 = OpVariable %26 Input +%65 = OpConstant %3 64 +%66 = OpTypeStruct %5 +%67 = OpTypeArray %66 %11 +%68 = OpTypePointer Output %67 +%69 = OpVariable %68 Output +%70 = OpTypeArray %8 %13 +%71 = OpTypePointer Output %70 +%72 = OpVariable %71 Output +%75 = OpConstantNull %14 +%76 = OpVariable %26 Input +%86 = OpTypePointer Function %3 +%90 = OpTypePointer Workgroup %3 +%97 = OpTypePointer Workgroup %10 +%99 = OpTypePointer Workgroup %12 +%106 = OpTypePointer Workgroup %5 +%109 = OpTypePointer Output %5 +%119 = OpTypePointer Workgroup %8 +%122 = OpTypePointer Output %8 +%20 = OpFunction %8 None %21 %19 = OpLabel OpBranch %23 %23 = OpLabel -OpControlBarrier %24 %24 %25 -%26 = OpCompositeExtract %3 %22 0 -%27 = OpCompositeExtract %3 %22 1 -%28 = OpCompositeExtract %3 %22 2 -OpEmitMeshTasksEXT %26 %27 %28 %15 +%27 = OpLoad %3 %25 +%29 = OpIEqual %30 %27 %28 +OpSelectionMerge %31 None +OpBranchConditional %29 %32 %31 +%32 = OpLabel +OpStore %15 %24 +OpBranch %31 +%31 = OpLabel +OpControlBarrier %33 %33 %34 +OpBranch %35 +%35 = OpLabel +OpReturnValue %22 OpFunctionEnd -%44 = OpFunction %2 None %21 -%29 = OpLabel -%33 = OpVariable %35 Function -%34 = OpVariable %35 Function -%32 = OpLoad %3 %30 -OpBranch %45 -%45 = OpLabel -%49 = OpLoad %8 %47 -%53 = OpIEqual %51 %49 %50 -%54 = OpAll %52 %53 -OpSelectionMerge %55 None -OpBranchConditional %54 %56 %55 -%56 = OpLabel -OpStore %17 %46 -OpBranch %55 -%55 = OpLabel -OpControlBarrier %24 %24 %25 -OpBranch %57 -%57 = OpLabel -OpBranch %36 -%36 = OpLabel -OpControlBarrier %24 %24 %25 -%58 = OpAccessChain %59 %17 %24 -%60 = OpLoad %3 %58 -%61 = OpExtInst %3 %1 UMin %60 %11 -%62 = OpAccessChain %59 %17 %11 -%63 = OpLoad %3 %62 -%64 = OpExtInst %3 %1 UMin %63 %13 -%65 = OpAccessChain %66 %17 %67 -%68 = OpAccessChain %69 %17 %13 -OpControlBarrier %24 %24 %25 -OpSetMeshOutputsEXT %61 %64 -OpStore %33 %32 -OpBranch %70 -%70 = OpLabel -OpLoopMerge %72 %81 None -OpBranch %80 +%36 = OpFunction %2 None %37 +%38 = OpLabel +%39 = OpFunctionCall %8 %20 +OpControlBarrier %33 %33 %34 +%46 = OpCompositeExtract %3 %39 0 +%43 = OpUConvert %41 %46 +%47 = OpCompositeExtract %3 %39 1 +%44 = OpUConvert %41 %47 +%48 = OpCompositeExtract %3 %39 2 +%45 = OpUConvert %41 %48 +%49 = OpIMul %41 %43 %44 +%50 = OpIMul %41 %49 %45 +%51 = OpUGreaterThanEqual %30 %50 %42 +%52 = OpUGreaterThanEqual %30 %43 %40 +%53 = OpUGreaterThanEqual %30 %44 %40 +%54 = OpUGreaterThanEqual %30 %45 %40 +%55 = OpLogicalOr %30 %51 %52 +%56 = OpLogicalOr %30 %55 %53 +%57 = OpLogicalOr %30 %56 %54 +%58 = OpCompositeConstruct %8 %28 %28 %28 +%59 = OpSelect %8 %57 %58 %39 +%60 = OpCompositeExtract %3 %59 0 +%61 = OpCompositeExtract %3 %59 1 +%62 = OpCompositeExtract %3 %59 2 +OpEmitMeshTasksEXT %60 %61 %62 %15 +OpFunctionEnd +%73 = OpFunction %2 None %37 +%63 = OpLabel +OpBranch %74 +%74 = OpLabel +%77 = OpLoad %3 %76 +%78 = OpIEqual %30 %77 %28 +OpSelectionMerge %79 None +OpBranchConditional %78 %80 %79 %80 = OpLabel -%83 = OpLoad %3 %33 -%84 = OpULessThan %52 %83 %61 -OpBranchConditional %84 %82 %72 -%82 = OpLabel -%74 = OpLoad %3 %33 -%75 = OpAccessChain %76 %65 %74 %67 -%77 = OpLoad %5 %75 -%78 = OpAccessChain %79 %40 %74 %67 -OpStore %78 %77 +OpStore %17 %75 +OpBranch %79 +%79 = OpLabel +OpControlBarrier %33 %33 %34 OpBranch %81 %81 = OpLabel -%85 = OpLoad %3 %33 -%86 = OpIAdd %3 %85 %13 -OpStore %33 %86 -OpBranch %70 -%72 = OpLabel -OpStore %34 %32 -OpBranch %71 -%71 = OpLabel -OpLoopMerge %73 %94 None -OpBranch %93 -%93 = OpLabel -%96 = OpLoad %3 %34 -%97 = OpULessThan %52 %96 %64 -OpBranchConditional %97 %95 %73 -%95 = OpLabel -%87 = OpLoad %3 %34 -%88 = OpAccessChain %89 %68 %87 %67 -%90 = OpLoad %8 %88 -%91 = OpAccessChain %92 %43 %87 -OpStore %91 %90 -OpBranch %94 -%94 = OpLabel -%98 = OpLoad %3 %34 -%99 = OpIAdd %3 %98 %13 -OpStore %34 %99 -OpBranch %71 -%73 = OpLabel +OpReturn +OpFunctionEnd +%82 = OpFunction %2 None %37 +%83 = OpLabel +%84 = OpVariable %86 Function +%85 = OpVariable %86 Function +%87 = OpLoad %3 %64 +%88 = OpFunctionCall %2 %73 +OpControlBarrier %33 %33 %34 +%89 = OpAccessChain %90 %17 %33 +%91 = OpLoad %3 %89 +%92 = OpExtInst %3 %1 UMin %91 %11 +%93 = OpAccessChain %90 %17 %11 +%94 = OpLoad %3 %93 +%95 = OpExtInst %3 %1 UMin %94 %13 +%96 = OpAccessChain %97 %17 %28 +%98 = OpAccessChain %99 %17 %13 +OpSetMeshOutputsEXT %92 %95 +OpStore %84 %87 +OpBranch %100 +%100 = OpLabel +OpLoopMerge %102 %111 None +OpBranch %110 +%110 = OpLabel +%113 = OpLoad %3 %84 +%114 = OpULessThan %30 %113 %92 +OpBranchConditional %114 %112 %102 +%112 = OpLabel +%104 = OpLoad %3 %84 +%105 = OpAccessChain %106 %96 %104 %28 +%107 = OpLoad %5 %105 +%108 = OpAccessChain %109 %69 %104 %28 +OpStore %108 %107 +OpBranch %111 +%111 = OpLabel +%115 = OpLoad %3 %84 +%116 = OpIAdd %3 %115 %65 +OpStore %84 %116 +OpBranch %100 +%102 = OpLabel +OpStore %85 %87 +OpBranch %101 +%101 = OpLabel +OpLoopMerge %103 %124 None +OpBranch %123 +%123 = OpLabel +%126 = OpLoad %3 %85 +%127 = OpULessThan %30 %126 %95 +OpBranchConditional %127 %125 %103 +%125 = OpLabel +%117 = OpLoad %3 %85 +%118 = OpAccessChain %119 %98 %117 %28 +%120 = OpLoad %8 %118 +%121 = OpAccessChain %122 %72 %117 +OpStore %121 %120 +OpBranch %124 +%124 = OpLabel +%128 = OpLoad %3 %85 +%129 = OpIAdd %3 %128 %65 +OpStore %85 %129 +OpBranch %101 +%103 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-mesh-shader-lines.spvasm b/naga/tests/out/spv/wgsl-mesh-shader-lines.spvasm index a03e4e4d78..89e42e1fd3 100644 --- a/naga/tests/out/spv/wgsl-mesh-shader-lines.spvasm +++ b/naga/tests/out/spv/wgsl-mesh-shader-lines.spvasm @@ -1,23 +1,24 @@ ; SPIR-V ; Version: 1.4 ; Generator: rspirv -; Bound: 101 +; Bound: 131 OpCapability Shader OpCapability MeshShadingEXT +OpCapability Int64 OpExtension "SPV_EXT_mesh_shader" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint TaskEXT %21 "ts_main" %16 -OpEntryPoint MeshEXT %44 "ms_main" %16 %30 %40 %43 %18 %47 -OpExecutionMode %21 LocalSize 1 1 1 -OpExecutionMode %44 LocalSize 1 1 1 -OpExecutionMode %44 OutputLinesNV -OpExecutionMode %44 OutputVertices 2 -OpExecutionMode %44 OutputPrimitivesNV 1 -OpDecorate %30 BuiltIn LocalInvocationIndex -OpMemberDecorate %37 0 BuiltIn Position -OpDecorate %37 Block -OpDecorate %43 BuiltIn PrimitiveLineIndicesEXT +OpEntryPoint TaskEXT %36 "ts_main" %16 %26 +OpEntryPoint MeshEXT %82 "ms_main" %16 %64 %69 %72 %18 %76 +OpExecutionMode %36 LocalSize 64 1 1 +OpExecutionMode %82 LocalSize 64 1 1 +OpExecutionMode %82 OutputLinesNV +OpExecutionMode %82 OutputVertices 2 +OpExecutionMode %82 OutputPrimitivesNV 1 +OpDecorate %64 BuiltIn LocalInvocationIndex +OpMemberDecorate %66 0 BuiltIn Position +OpDecorate %66 Block +OpDecorate %72 BuiltIn PrimitiveLineIndicesEXT OpMemberDecorate %4 0 Offset 0 OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %9 0 Offset 0 @@ -27,7 +28,8 @@ OpMemberDecorate %15 0 Offset 0 OpMemberDecorate %15 1 Offset 32 OpMemberDecorate %15 2 Offset 40 OpMemberDecorate %15 3 Offset 44 -OpDecorate %47 BuiltIn LocalInvocationId +OpDecorate %26 BuiltIn LocalInvocationIndex +OpDecorate %76 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeStruct %3 @@ -46,119 +48,157 @@ OpDecorate %47 BuiltIn LocalInvocationId %16 = OpVariable %17 TaskPayloadWorkgroupEXT %19 = OpTypePointer Workgroup %15 %18 = OpVariable %19 Workgroup -%22 = OpTypeFunction %2 +%22 = OpTypeFunction %10 %23 = OpConstantComposite %10 %14 %14 %14 -%25 = OpConstant %3 264 -%31 = OpTypePointer Input %3 -%30 = OpVariable %31 Input -%35 = OpTypePointer Function %3 -%37 = OpTypeStruct %5 -%38 = OpTypeArray %37 %12 -%39 = OpTypePointer Output %38 -%40 = OpVariable %39 Output -%41 = OpTypeArray %8 %14 -%42 = OpTypePointer Output %41 -%43 = OpVariable %42 Output -%46 = OpConstantNull %15 -%48 = OpTypePointer Input %10 -%47 = OpVariable %48 Input -%50 = OpConstantNull %10 -%52 = OpTypeBool -%51 = OpTypeVector %52 3 -%59 = OpTypePointer Workgroup %3 -%63 = OpConstant %3 3 -%67 = OpTypePointer Workgroup %11 -%68 = OpConstant %3 0 -%70 = OpTypePointer Workgroup %13 -%77 = OpTypePointer Workgroup %5 -%80 = OpTypePointer Output %5 -%90 = OpTypePointer Workgroup %8 -%93 = OpTypePointer Output %8 -%21 = OpFunction %2 None %22 +%25 = OpConstantNull %4 +%27 = OpTypePointer Input %3 +%26 = OpVariable %27 Input +%29 = OpConstant %3 0 +%31 = OpTypeBool +%34 = OpConstant %3 264 +%37 = OpTypeFunction %2 +%41 = OpTypeInt 64 0 +%40 = OpConstant %41 256 +%42 = OpConstant %41 1024 +%64 = OpVariable %27 Input +%65 = OpConstant %3 64 +%66 = OpTypeStruct %5 +%67 = OpTypeArray %66 %12 +%68 = OpTypePointer Output %67 +%69 = OpVariable %68 Output +%70 = OpTypeArray %8 %14 +%71 = OpTypePointer Output %70 +%72 = OpVariable %71 Output +%75 = OpConstantNull %15 +%76 = OpVariable %27 Input +%86 = OpTypePointer Function %3 +%90 = OpTypePointer Workgroup %3 +%94 = OpConstant %3 3 +%98 = OpTypePointer Workgroup %11 +%100 = OpTypePointer Workgroup %13 +%107 = OpTypePointer Workgroup %5 +%110 = OpTypePointer Output %5 +%120 = OpTypePointer Workgroup %8 +%123 = OpTypePointer Output %8 +%21 = OpFunction %10 None %22 %20 = OpLabel OpBranch %24 %24 = OpLabel -OpControlBarrier %12 %12 %25 -%26 = OpCompositeExtract %3 %23 0 -%27 = OpCompositeExtract %3 %23 1 -%28 = OpCompositeExtract %3 %23 2 -OpEmitMeshTasksEXT %26 %27 %28 %16 +%28 = OpLoad %3 %26 +%30 = OpIEqual %31 %28 %29 +OpSelectionMerge %32 None +OpBranchConditional %30 %33 %32 +%33 = OpLabel +OpStore %16 %25 +OpBranch %32 +%32 = OpLabel +OpControlBarrier %12 %12 %34 +OpBranch %35 +%35 = OpLabel +OpReturnValue %23 OpFunctionEnd -%44 = OpFunction %2 None %22 -%29 = OpLabel -%33 = OpVariable %35 Function -%34 = OpVariable %35 Function -%32 = OpLoad %3 %30 -OpBranch %45 -%45 = OpLabel -%49 = OpLoad %10 %47 -%53 = OpIEqual %51 %49 %50 -%54 = OpAll %52 %53 -OpSelectionMerge %55 None -OpBranchConditional %54 %56 %55 -%56 = OpLabel -OpStore %18 %46 -OpBranch %55 -%55 = OpLabel -OpControlBarrier %12 %12 %25 -OpBranch %57 -%57 = OpLabel -OpBranch %36 -%36 = OpLabel -OpControlBarrier %12 %12 %25 -%58 = OpAccessChain %59 %18 %12 -%60 = OpLoad %3 %58 -%61 = OpExtInst %3 %1 UMin %60 %12 -%62 = OpAccessChain %59 %18 %63 -%64 = OpLoad %3 %62 -%65 = OpExtInst %3 %1 UMin %64 %14 -%66 = OpAccessChain %67 %18 %68 -%69 = OpAccessChain %70 %18 %14 -OpControlBarrier %12 %12 %25 -OpSetMeshOutputsEXT %61 %65 -OpStore %33 %32 -OpBranch %71 -%71 = OpLabel -OpLoopMerge %73 %82 None +%36 = OpFunction %2 None %37 +%38 = OpLabel +%39 = OpFunctionCall %10 %21 +OpControlBarrier %12 %12 %34 +%46 = OpCompositeExtract %3 %39 0 +%43 = OpUConvert %41 %46 +%47 = OpCompositeExtract %3 %39 1 +%44 = OpUConvert %41 %47 +%48 = OpCompositeExtract %3 %39 2 +%45 = OpUConvert %41 %48 +%49 = OpIMul %41 %43 %44 +%50 = OpIMul %41 %49 %45 +%51 = OpUGreaterThanEqual %31 %50 %42 +%52 = OpUGreaterThanEqual %31 %43 %40 +%53 = OpUGreaterThanEqual %31 %44 %40 +%54 = OpUGreaterThanEqual %31 %45 %40 +%55 = OpLogicalOr %31 %51 %52 +%56 = OpLogicalOr %31 %55 %53 +%57 = OpLogicalOr %31 %56 %54 +%58 = OpCompositeConstruct %10 %29 %29 %29 +%59 = OpSelect %10 %57 %58 %39 +%60 = OpCompositeExtract %3 %59 0 +%61 = OpCompositeExtract %3 %59 1 +%62 = OpCompositeExtract %3 %59 2 +OpEmitMeshTasksEXT %60 %61 %62 %16 +OpFunctionEnd +%73 = OpFunction %2 None %37 +%63 = OpLabel +OpBranch %74 +%74 = OpLabel +%77 = OpLoad %3 %76 +%78 = OpIEqual %31 %77 %29 +OpSelectionMerge %79 None +OpBranchConditional %78 %80 %79 +%80 = OpLabel +OpStore %18 %75 +OpBranch %79 +%79 = OpLabel +OpControlBarrier %12 %12 %34 OpBranch %81 %81 = OpLabel -%84 = OpLoad %3 %33 -%85 = OpULessThan %52 %84 %61 -OpBranchConditional %85 %83 %73 +OpReturn +OpFunctionEnd +%82 = OpFunction %2 None %37 %83 = OpLabel -%75 = OpLoad %3 %33 -%76 = OpAccessChain %77 %66 %75 %68 -%78 = OpLoad %5 %76 -%79 = OpAccessChain %80 %40 %75 %68 -OpStore %79 %78 -OpBranch %82 -%82 = OpLabel -%86 = OpLoad %3 %33 -%87 = OpIAdd %3 %86 %14 -OpStore %33 %87 -OpBranch %71 -%73 = OpLabel -OpStore %34 %32 -OpBranch %72 -%72 = OpLabel -OpLoopMerge %74 %95 None -OpBranch %94 -%94 = OpLabel -%97 = OpLoad %3 %34 -%98 = OpULessThan %52 %97 %65 -OpBranchConditional %98 %96 %74 -%96 = OpLabel -%88 = OpLoad %3 %34 -%89 = OpAccessChain %90 %69 %88 %68 -%91 = OpLoad %8 %89 -%92 = OpAccessChain %93 %43 %88 -OpStore %92 %91 -OpBranch %95 -%95 = OpLabel -%99 = OpLoad %3 %34 -%100 = OpIAdd %3 %99 %14 -OpStore %34 %100 -OpBranch %72 -%74 = OpLabel +%84 = OpVariable %86 Function +%85 = OpVariable %86 Function +%87 = OpLoad %3 %64 +%88 = OpFunctionCall %2 %73 +OpControlBarrier %12 %12 %34 +%89 = OpAccessChain %90 %18 %12 +%91 = OpLoad %3 %89 +%92 = OpExtInst %3 %1 UMin %91 %12 +%93 = OpAccessChain %90 %18 %94 +%95 = OpLoad %3 %93 +%96 = OpExtInst %3 %1 UMin %95 %14 +%97 = OpAccessChain %98 %18 %29 +%99 = OpAccessChain %100 %18 %14 +OpSetMeshOutputsEXT %92 %96 +OpStore %84 %87 +OpBranch %101 +%101 = OpLabel +OpLoopMerge %103 %112 None +OpBranch %111 +%111 = OpLabel +%114 = OpLoad %3 %84 +%115 = OpULessThan %31 %114 %92 +OpBranchConditional %115 %113 %103 +%113 = OpLabel +%105 = OpLoad %3 %84 +%106 = OpAccessChain %107 %97 %105 %29 +%108 = OpLoad %5 %106 +%109 = OpAccessChain %110 %69 %105 %29 +OpStore %109 %108 +OpBranch %112 +%112 = OpLabel +%116 = OpLoad %3 %84 +%117 = OpIAdd %3 %116 %65 +OpStore %84 %117 +OpBranch %101 +%103 = OpLabel +OpStore %85 %87 +OpBranch %102 +%102 = OpLabel +OpLoopMerge %104 %125 None +OpBranch %124 +%124 = OpLabel +%127 = OpLoad %3 %85 +%128 = OpULessThan %31 %127 %96 +OpBranchConditional %128 %126 %104 +%126 = OpLabel +%118 = OpLoad %3 %85 +%119 = OpAccessChain %120 %99 %118 %29 +%121 = OpLoad %8 %119 +%122 = OpAccessChain %123 %72 %118 +OpStore %122 %121 +OpBranch %125 +%125 = OpLabel +%129 = OpLoad %3 %85 +%130 = OpIAdd %3 %129 %65 +OpStore %85 %130 +OpBranch %102 +%104 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-mesh-shader-points.spvasm b/naga/tests/out/spv/wgsl-mesh-shader-points.spvasm index 1f7ba638d2..5cb2c537f1 100644 --- a/naga/tests/out/spv/wgsl-mesh-shader-points.spvasm +++ b/naga/tests/out/spv/wgsl-mesh-shader-points.spvasm @@ -1,23 +1,24 @@ ; SPIR-V ; Version: 1.4 ; Generator: rspirv -; Bound: 99 +; Bound: 129 OpCapability Shader OpCapability MeshShadingEXT +OpCapability Int64 OpExtension "SPV_EXT_mesh_shader" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint TaskEXT %19 "ts_main" %14 -OpEntryPoint MeshEXT %43 "ms_main" %14 %29 %39 %42 %16 %46 -OpExecutionMode %19 LocalSize 1 1 1 -OpExecutionMode %43 LocalSize 1 1 1 -OpExecutionMode %43 OutputPoints -OpExecutionMode %43 OutputVertices 1 -OpExecutionMode %43 OutputPrimitivesNV 1 -OpDecorate %29 BuiltIn LocalInvocationIndex -OpMemberDecorate %36 0 BuiltIn Position -OpDecorate %36 Block -OpDecorate %42 BuiltIn PrimitivePointIndicesEXT +OpEntryPoint TaskEXT %35 "ts_main" %14 %24 +OpEntryPoint MeshEXT %81 "ms_main" %14 %63 %68 %71 %16 %75 +OpExecutionMode %35 LocalSize 64 1 1 +OpExecutionMode %81 LocalSize 64 1 1 +OpExecutionMode %81 OutputPoints +OpExecutionMode %81 OutputVertices 1 +OpExecutionMode %81 OutputPrimitivesNV 1 +OpDecorate %63 BuiltIn LocalInvocationIndex +OpMemberDecorate %65 0 BuiltIn Position +OpDecorate %65 Block +OpDecorate %71 BuiltIn PrimitivePointIndicesEXT OpMemberDecorate %4 0 Offset 0 OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %8 0 Offset 0 @@ -27,7 +28,8 @@ OpMemberDecorate %13 0 Offset 0 OpMemberDecorate %13 1 Offset 16 OpMemberDecorate %13 2 Offset 20 OpMemberDecorate %13 3 Offset 24 -OpDecorate %46 BuiltIn LocalInvocationId +OpDecorate %24 BuiltIn LocalInvocationIndex +OpDecorate %75 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeStruct %3 @@ -44,119 +46,157 @@ OpDecorate %46 BuiltIn LocalInvocationId %14 = OpVariable %15 TaskPayloadWorkgroupEXT %17 = OpTypePointer Workgroup %13 %16 = OpVariable %17 Workgroup -%20 = OpTypeFunction %2 +%20 = OpTypeFunction %9 %21 = OpConstantComposite %9 %11 %11 %11 -%23 = OpConstant %3 2 -%24 = OpConstant %3 264 -%30 = OpTypePointer Input %3 -%29 = OpVariable %30 Input -%34 = OpTypePointer Function %3 -%36 = OpTypeStruct %5 -%37 = OpTypeArray %36 %11 -%38 = OpTypePointer Output %37 -%39 = OpVariable %38 Output -%40 = OpTypeArray %3 %11 -%41 = OpTypePointer Output %40 -%42 = OpVariable %41 Output -%45 = OpConstantNull %13 -%47 = OpTypePointer Input %9 -%46 = OpVariable %47 Input -%49 = OpConstantNull %9 -%51 = OpTypeBool -%50 = OpTypeVector %51 3 -%58 = OpTypePointer Workgroup %3 -%62 = OpConstant %3 3 -%66 = OpTypePointer Workgroup %10 -%67 = OpConstant %3 0 -%69 = OpTypePointer Workgroup %12 -%76 = OpTypePointer Workgroup %5 -%79 = OpTypePointer Output %5 -%91 = OpTypePointer Output %3 -%19 = OpFunction %2 None %20 +%23 = OpConstantNull %4 +%25 = OpTypePointer Input %3 +%24 = OpVariable %25 Input +%27 = OpConstant %3 0 +%29 = OpTypeBool +%32 = OpConstant %3 2 +%33 = OpConstant %3 264 +%36 = OpTypeFunction %2 +%40 = OpTypeInt 64 0 +%39 = OpConstant %40 256 +%41 = OpConstant %40 1024 +%63 = OpVariable %25 Input +%64 = OpConstant %3 64 +%65 = OpTypeStruct %5 +%66 = OpTypeArray %65 %11 +%67 = OpTypePointer Output %66 +%68 = OpVariable %67 Output +%69 = OpTypeArray %3 %11 +%70 = OpTypePointer Output %69 +%71 = OpVariable %70 Output +%74 = OpConstantNull %13 +%75 = OpVariable %25 Input +%85 = OpTypePointer Function %3 +%89 = OpTypePointer Workgroup %3 +%93 = OpConstant %3 3 +%97 = OpTypePointer Workgroup %10 +%99 = OpTypePointer Workgroup %12 +%106 = OpTypePointer Workgroup %5 +%109 = OpTypePointer Output %5 +%121 = OpTypePointer Output %3 +%19 = OpFunction %9 None %20 %18 = OpLabel OpBranch %22 %22 = OpLabel -OpControlBarrier %23 %23 %24 -%25 = OpCompositeExtract %3 %21 0 -%26 = OpCompositeExtract %3 %21 1 -%27 = OpCompositeExtract %3 %21 2 -OpEmitMeshTasksEXT %25 %26 %27 %14 +%26 = OpLoad %3 %24 +%28 = OpIEqual %29 %26 %27 +OpSelectionMerge %30 None +OpBranchConditional %28 %31 %30 +%31 = OpLabel +OpStore %14 %23 +OpBranch %30 +%30 = OpLabel +OpControlBarrier %32 %32 %33 +OpBranch %34 +%34 = OpLabel +OpReturnValue %21 OpFunctionEnd -%43 = OpFunction %2 None %20 -%28 = OpLabel -%32 = OpVariable %34 Function -%33 = OpVariable %34 Function -%31 = OpLoad %3 %29 -OpBranch %44 -%44 = OpLabel -%48 = OpLoad %9 %46 -%52 = OpIEqual %50 %48 %49 -%53 = OpAll %51 %52 -OpSelectionMerge %54 None -OpBranchConditional %53 %55 %54 -%55 = OpLabel -OpStore %16 %45 -OpBranch %54 -%54 = OpLabel -OpControlBarrier %23 %23 %24 -OpBranch %56 -%56 = OpLabel -OpBranch %35 -%35 = OpLabel -OpControlBarrier %23 %23 %24 -%57 = OpAccessChain %58 %16 %23 -%59 = OpLoad %3 %57 -%60 = OpExtInst %3 %1 UMin %59 %11 -%61 = OpAccessChain %58 %16 %62 -%63 = OpLoad %3 %61 -%64 = OpExtInst %3 %1 UMin %63 %11 -%65 = OpAccessChain %66 %16 %67 -%68 = OpAccessChain %69 %16 %11 -OpControlBarrier %23 %23 %24 -OpSetMeshOutputsEXT %60 %64 -OpStore %32 %31 -OpBranch %70 -%70 = OpLabel -OpLoopMerge %72 %81 None +%35 = OpFunction %2 None %36 +%37 = OpLabel +%38 = OpFunctionCall %9 %19 +OpControlBarrier %32 %32 %33 +%45 = OpCompositeExtract %3 %38 0 +%42 = OpUConvert %40 %45 +%46 = OpCompositeExtract %3 %38 1 +%43 = OpUConvert %40 %46 +%47 = OpCompositeExtract %3 %38 2 +%44 = OpUConvert %40 %47 +%48 = OpIMul %40 %42 %43 +%49 = OpIMul %40 %48 %44 +%50 = OpUGreaterThanEqual %29 %49 %41 +%51 = OpUGreaterThanEqual %29 %42 %39 +%52 = OpUGreaterThanEqual %29 %43 %39 +%53 = OpUGreaterThanEqual %29 %44 %39 +%54 = OpLogicalOr %29 %50 %51 +%55 = OpLogicalOr %29 %54 %52 +%56 = OpLogicalOr %29 %55 %53 +%57 = OpCompositeConstruct %9 %27 %27 %27 +%58 = OpSelect %9 %56 %57 %38 +%59 = OpCompositeExtract %3 %58 0 +%60 = OpCompositeExtract %3 %58 1 +%61 = OpCompositeExtract %3 %58 2 +OpEmitMeshTasksEXT %59 %60 %61 %14 +OpFunctionEnd +%72 = OpFunction %2 None %36 +%62 = OpLabel +OpBranch %73 +%73 = OpLabel +%76 = OpLoad %3 %75 +%77 = OpIEqual %29 %76 %27 +OpSelectionMerge %78 None +OpBranchConditional %77 %79 %78 +%79 = OpLabel +OpStore %16 %74 +OpBranch %78 +%78 = OpLabel +OpControlBarrier %32 %32 %33 OpBranch %80 %80 = OpLabel -%83 = OpLoad %3 %32 -%84 = OpULessThan %51 %83 %60 -OpBranchConditional %84 %82 %72 +OpReturn +OpFunctionEnd +%81 = OpFunction %2 None %36 %82 = OpLabel -%74 = OpLoad %3 %32 -%75 = OpAccessChain %76 %65 %74 %67 -%77 = OpLoad %5 %75 -%78 = OpAccessChain %79 %39 %74 %67 -OpStore %78 %77 -OpBranch %81 -%81 = OpLabel -%85 = OpLoad %3 %32 -%86 = OpIAdd %3 %85 %11 -OpStore %32 %86 -OpBranch %70 -%72 = OpLabel -OpStore %33 %31 -OpBranch %71 -%71 = OpLabel -OpLoopMerge %73 %93 None -OpBranch %92 -%92 = OpLabel -%95 = OpLoad %3 %33 -%96 = OpULessThan %51 %95 %64 -OpBranchConditional %96 %94 %73 -%94 = OpLabel -%87 = OpLoad %3 %33 -%88 = OpAccessChain %58 %68 %87 %67 -%89 = OpLoad %3 %88 -%90 = OpAccessChain %91 %42 %87 -OpStore %90 %89 -OpBranch %93 -%93 = OpLabel -%97 = OpLoad %3 %33 -%98 = OpIAdd %3 %97 %11 -OpStore %33 %98 -OpBranch %71 -%73 = OpLabel +%83 = OpVariable %85 Function +%84 = OpVariable %85 Function +%86 = OpLoad %3 %63 +%87 = OpFunctionCall %2 %72 +OpControlBarrier %32 %32 %33 +%88 = OpAccessChain %89 %16 %32 +%90 = OpLoad %3 %88 +%91 = OpExtInst %3 %1 UMin %90 %11 +%92 = OpAccessChain %89 %16 %93 +%94 = OpLoad %3 %92 +%95 = OpExtInst %3 %1 UMin %94 %11 +%96 = OpAccessChain %97 %16 %27 +%98 = OpAccessChain %99 %16 %11 +OpSetMeshOutputsEXT %91 %95 +OpStore %83 %86 +OpBranch %100 +%100 = OpLabel +OpLoopMerge %102 %111 None +OpBranch %110 +%110 = OpLabel +%113 = OpLoad %3 %83 +%114 = OpULessThan %29 %113 %91 +OpBranchConditional %114 %112 %102 +%112 = OpLabel +%104 = OpLoad %3 %83 +%105 = OpAccessChain %106 %96 %104 %27 +%107 = OpLoad %5 %105 +%108 = OpAccessChain %109 %68 %104 %27 +OpStore %108 %107 +OpBranch %111 +%111 = OpLabel +%115 = OpLoad %3 %83 +%116 = OpIAdd %3 %115 %64 +OpStore %83 %116 +OpBranch %100 +%102 = OpLabel +OpStore %84 %86 +OpBranch %101 +%101 = OpLabel +OpLoopMerge %103 %123 None +OpBranch %122 +%122 = OpLabel +%125 = OpLoad %3 %84 +%126 = OpULessThan %29 %125 %95 +OpBranchConditional %126 %124 %103 +%124 = OpLabel +%117 = OpLoad %3 %84 +%118 = OpAccessChain %89 %98 %117 %27 +%119 = OpLoad %3 %118 +%120 = OpAccessChain %121 %71 %117 +OpStore %120 %119 +OpBranch %123 +%123 = OpLabel +%127 = OpLoad %3 %84 +%128 = OpIAdd %3 %127 %64 +OpStore %84 %128 +OpBranch %101 +%103 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-mesh-shader.spvasm b/naga/tests/out/spv/wgsl-mesh-shader.spvasm index fde357c5c7..ee5a533ac6 100644 --- a/naga/tests/out/spv/wgsl-mesh-shader.spvasm +++ b/naga/tests/out/spv/wgsl-mesh-shader.spvasm @@ -1,65 +1,49 @@ ; SPIR-V ; Version: 1.4 ; Generator: rspirv -; Bound: 384 +; Bound: 336 OpCapability Shader OpCapability MeshShadingEXT +OpCapability Int64 OpExtension "SPV_EXT_mesh_shader" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint TaskEXT %24 "ts_main" %17 %19 %33 -OpEntryPoint TaskEXT %57 "ts_divergent" %54 %17 -OpEntryPoint MeshEXT %95 "ms_main" %17 %72 %81 %85 %88 %91 %94 %19 %21 %107 -OpEntryPoint MeshEXT %214 "ms_no_ts" %192 %200 %204 %207 %210 %213 %19 %21 %217 -OpEntryPoint MeshEXT %301 "ms_divergent" %279 %287 %291 %294 %297 %300 %19 %21 %303 -OpEntryPoint Fragment %379 "fs_main" %370 %373 %376 %378 -OpExecutionMode %24 LocalSize 1 1 1 -OpExecutionMode %57 LocalSize 2 1 1 -OpExecutionMode %95 LocalSize 1 1 1 -OpExecutionMode %95 OutputTrianglesNV -OpExecutionMode %95 OutputVertices 3 -OpExecutionMode %95 OutputPrimitivesNV 1 -OpExecutionMode %214 LocalSize 1 1 1 -OpExecutionMode %214 OutputTrianglesNV -OpExecutionMode %214 OutputVertices 3 -OpExecutionMode %214 OutputPrimitivesNV 1 -OpExecutionMode %301 LocalSize 1 1 1 -OpExecutionMode %301 OutputTrianglesNV -OpExecutionMode %301 OutputVertices 3 -OpExecutionMode %301 OutputPrimitivesNV 1 -OpExecutionMode %379 OriginUpperLeft -OpDecorate %72 BuiltIn LocalInvocationIndex -OpMemberDecorate %78 0 BuiltIn Position -OpDecorate %78 Block -OpMemberDecorate %82 0 BuiltIn CullPrimitiveEXT -OpMemberDecorate %82 0 PerPrimitiveNV -OpDecorate %82 Block -OpDecorate %85 PerPrimitiveNV -OpDecorate %88 Location 0 -OpDecorate %91 BuiltIn PrimitiveTriangleIndicesEXT -OpDecorate %94 Location 1 -OpDecorate %94 PerPrimitiveNV -OpDecorate %192 BuiltIn LocalInvocationIndex -OpMemberDecorate %197 0 BuiltIn Position -OpDecorate %197 Block -OpMemberDecorate %201 0 BuiltIn CullPrimitiveEXT -OpMemberDecorate %201 0 PerPrimitiveNV -OpDecorate %201 Block -OpDecorate %204 PerPrimitiveNV -OpDecorate %207 Location 0 -OpDecorate %210 BuiltIn PrimitiveTriangleIndicesEXT -OpDecorate %213 Location 1 -OpDecorate %213 PerPrimitiveNV -OpMemberDecorate %284 0 BuiltIn Position -OpDecorate %284 Block -OpMemberDecorate %288 0 BuiltIn CullPrimitiveEXT -OpMemberDecorate %288 0 PerPrimitiveNV -OpDecorate %288 Block -OpDecorate %291 PerPrimitiveNV -OpDecorate %294 Location 0 -OpDecorate %297 BuiltIn PrimitiveTriangleIndicesEXT -OpDecorate %300 Location 1 -OpDecorate %300 PerPrimitiveNV +OpEntryPoint TaskEXT %69 "ts_main" %37 %17 %51 +OpEntryPoint MeshEXT %172 "ms_main" %97 %17 %99 %104 %108 %111 %114 %117 %19 %21 %131 +OpEntryPoint MeshEXT %271 "ms_no_ts" %226 %228 %232 %236 %239 %242 %245 %19 %21 %249 +OpEntryPoint Fragment %331 "fs_main" %322 %325 %328 %330 +OpExecutionMode %69 LocalSize 64 1 1 +OpExecutionMode %172 LocalSize 64 1 1 +OpExecutionMode %172 OutputTrianglesNV +OpExecutionMode %172 OutputVertices 3 +OpExecutionMode %172 OutputPrimitivesNV 1 +OpExecutionMode %271 LocalSize 64 1 1 +OpExecutionMode %271 OutputTrianglesNV +OpExecutionMode %271 OutputVertices 3 +OpExecutionMode %271 OutputPrimitivesNV 1 +OpExecutionMode %331 OriginUpperLeft +OpDecorate %99 BuiltIn LocalInvocationIndex +OpMemberDecorate %101 0 BuiltIn Position +OpDecorate %101 Block +OpMemberDecorate %105 0 BuiltIn CullPrimitiveEXT +OpMemberDecorate %105 0 PerPrimitiveNV +OpDecorate %105 Block +OpDecorate %108 PerPrimitiveNV +OpDecorate %111 Location 0 +OpDecorate %114 BuiltIn PrimitiveTriangleIndicesEXT +OpDecorate %117 Location 1 +OpDecorate %117 PerPrimitiveNV +OpDecorate %228 BuiltIn LocalInvocationIndex +OpMemberDecorate %229 0 BuiltIn Position +OpDecorate %229 Block +OpMemberDecorate %233 0 BuiltIn CullPrimitiveEXT +OpMemberDecorate %233 0 PerPrimitiveNV +OpDecorate %233 Block +OpDecorate %236 PerPrimitiveNV +OpDecorate %239 Location 0 +OpDecorate %242 BuiltIn PrimitiveTriangleIndicesEXT +OpDecorate %245 Location 1 +OpDecorate %245 PerPrimitiveNV OpMemberDecorate %6 0 Offset 0 OpMemberDecorate %6 1 Offset 16 OpMemberDecorate %7 0 Offset 0 @@ -74,17 +58,17 @@ OpMemberDecorate %16 0 Offset 0 OpMemberDecorate %16 1 Offset 96 OpMemberDecorate %16 2 Offset 128 OpMemberDecorate %16 3 Offset 132 -OpDecorate %33 BuiltIn LocalInvocationId -OpDecorate %54 BuiltIn LocalInvocationIndex -OpDecorate %107 BuiltIn LocalInvocationId -OpDecorate %217 BuiltIn LocalInvocationId -OpDecorate %279 BuiltIn LocalInvocationIndex -OpDecorate %303 BuiltIn LocalInvocationId -OpDecorate %370 BuiltIn FragCoord -OpDecorate %373 Location 0 -OpDecorate %376 Location 1 -OpDecorate %376 PerPrimitiveNV -OpDecorate %378 Location 0 +OpDecorate %37 BuiltIn LocalInvocationId +OpDecorate %51 BuiltIn LocalInvocationIndex +OpDecorate %97 BuiltIn LocalInvocationId +OpDecorate %131 BuiltIn LocalInvocationIndex +OpDecorate %226 BuiltIn LocalInvocationId +OpDecorate %249 BuiltIn LocalInvocationIndex +OpDecorate %322 BuiltIn FragCoord +OpDecorate %325 Location 0 +OpDecorate %328 Location 1 +OpDecorate %328 PerPrimitiveNV +OpDecorate %330 Location 0 %2 = OpTypeVoid %3 = OpTypeFloat 32 %4 = OpTypeVector %3 4 @@ -106,536 +90,446 @@ OpDecorate %378 Location 0 %19 = OpVariable %20 Workgroup %22 = OpTypePointer Workgroup %16 %21 = OpVariable %22 Workgroup -%25 = OpTypeFunction %2 -%26 = OpConstant %3 1 -%27 = OpConstant %3 0 -%28 = OpConstantComposite %4 %26 %26 %27 %26 -%29 = OpConstantTrue %5 -%30 = OpConstantComposite %9 %15 %15 %15 -%32 = OpConstantNull %3 -%34 = OpTypePointer Input %9 -%33 = OpVariable %34 Input -%36 = OpConstantNull %9 -%37 = OpTypeVector %5 3 -%42 = OpConstant %8 2 -%43 = OpConstant %8 264 -%45 = OpTypePointer TaskPayloadWorkgroupEXT %4 -%46 = OpConstant %8 0 -%48 = OpTypePointer TaskPayloadWorkgroupEXT %5 -%55 = OpTypePointer Input %8 -%54 = OpVariable %55 Input -%58 = OpConstantComposite %9 %42 %42 %42 -%72 = OpVariable %55 Input -%76 = OpTypePointer Function %8 -%78 = OpTypeStruct %4 -%79 = OpTypeArray %78 %13 -%80 = OpTypePointer Output %79 -%81 = OpVariable %80 Output -%82 = OpTypeStruct %5 -%83 = OpTypeArray %82 %15 -%84 = OpTypePointer Output %83 -%85 = OpVariable %84 Output -%86 = OpTypeArray %4 %13 -%87 = OpTypePointer Output %86 -%88 = OpVariable %87 Output -%89 = OpTypeArray %9 %15 -%90 = OpTypePointer Output %89 -%91 = OpVariable %90 Output -%92 = OpTypeArray %4 %15 -%93 = OpTypePointer Output %92 -%94 = OpVariable %93 Output -%96 = OpConstant %3 2 -%97 = OpConstantComposite %4 %27 %26 %27 %26 -%98 = OpConstant %3 -1 -%99 = OpConstantComposite %4 %98 %98 %27 %26 -%100 = OpConstantComposite %4 %27 %27 %26 %26 -%101 = OpConstantComposite %4 %26 %98 %27 %26 -%102 = OpConstantComposite %4 %26 %27 %27 %26 -%103 = OpConstantComposite %9 %46 %15 %42 -%104 = OpConstantComposite %4 %26 %27 %26 %26 -%106 = OpConstantNull %16 -%107 = OpVariable %34 Input -%114 = OpTypePointer Workgroup %8 -%117 = OpTypePointer Workgroup %12 -%118 = OpTypePointer Workgroup %7 -%119 = OpTypePointer Workgroup %4 -%135 = OpTypePointer Workgroup %14 -%136 = OpTypePointer Workgroup %10 -%137 = OpTypePointer Workgroup %9 -%139 = OpTypePointer Workgroup %5 -%161 = OpTypePointer Output %4 -%176 = OpTypePointer Output %9 -%180 = OpTypePointer Output %5 -%192 = OpVariable %55 Input -%197 = OpTypeStruct %4 -%198 = OpTypeArray %197 %13 -%199 = OpTypePointer Output %198 -%200 = OpVariable %199 Output -%201 = OpTypeStruct %5 -%202 = OpTypeArray %201 %15 -%203 = OpTypePointer Output %202 -%204 = OpVariable %203 Output -%205 = OpTypeArray %4 %13 -%206 = OpTypePointer Output %205 -%207 = OpVariable %206 Output -%208 = OpTypeArray %9 %15 -%209 = OpTypePointer Output %208 -%210 = OpVariable %209 Output -%211 = OpTypeArray %4 %15 -%212 = OpTypePointer Output %211 -%213 = OpVariable %212 Output -%215 = OpConstantFalse %5 -%217 = OpVariable %34 Input -%279 = OpVariable %55 Input -%284 = OpTypeStruct %4 -%285 = OpTypeArray %284 %13 -%286 = OpTypePointer Output %285 -%287 = OpVariable %286 Output -%288 = OpTypeStruct %5 -%289 = OpTypeArray %288 %15 -%290 = OpTypePointer Output %289 -%291 = OpVariable %290 Output -%292 = OpTypeArray %4 %13 -%293 = OpTypePointer Output %292 -%294 = OpVariable %293 Output -%295 = OpTypeArray %9 %15 -%296 = OpTypePointer Output %295 -%297 = OpVariable %296 Output -%298 = OpTypeArray %4 %15 -%299 = OpTypePointer Output %298 -%300 = OpVariable %299 Output -%303 = OpVariable %34 Input -%371 = OpTypePointer Input %4 -%370 = OpVariable %371 Input -%373 = OpVariable %371 Input -%376 = OpVariable %371 Input -%378 = OpVariable %161 Output -%24 = OpFunction %2 None %25 +%25 = OpTypeFunction %5 +%27 = OpTypePointer TaskPayloadWorkgroupEXT %5 +%33 = OpTypeFunction %2 %5 +%38 = OpTypePointer Input %9 +%37 = OpVariable %38 Input +%41 = OpTypeFunction %9 +%42 = OpConstant %8 0 +%43 = OpConstant %3 1 +%44 = OpConstant %3 0 +%45 = OpConstantComposite %4 %43 %43 %44 %43 +%46 = OpConstantTrue %5 +%47 = OpConstantComposite %9 %15 %15 %15 +%48 = OpConstantComposite %9 %42 %42 %42 +%50 = OpConstantNull %6 +%52 = OpTypePointer Input %8 +%51 = OpVariable %52 Input +%57 = OpConstant %8 2 +%58 = OpConstant %8 264 +%64 = OpTypePointer TaskPayloadWorkgroupEXT %4 +%70 = OpTypeFunction %2 +%74 = OpTypeInt 64 0 +%73 = OpConstant %74 256 +%75 = OpConstant %74 1024 +%97 = OpVariable %38 Input +%99 = OpVariable %52 Input +%100 = OpConstant %8 64 +%101 = OpTypeStruct %4 +%102 = OpTypeArray %101 %13 +%103 = OpTypePointer Output %102 +%104 = OpVariable %103 Output +%105 = OpTypeStruct %5 +%106 = OpTypeArray %105 %15 +%107 = OpTypePointer Output %106 +%108 = OpVariable %107 Output +%109 = OpTypeArray %4 %13 +%110 = OpTypePointer Output %109 +%111 = OpVariable %110 Output +%112 = OpTypeArray %9 %15 +%113 = OpTypePointer Output %112 +%114 = OpVariable %113 Output +%115 = OpTypeArray %4 %15 +%116 = OpTypePointer Output %115 +%117 = OpVariable %116 Output +%119 = OpConstant %3 2 +%120 = OpConstantComposite %4 %44 %43 %44 %43 +%121 = OpConstant %3 -1 +%122 = OpConstantComposite %4 %121 %121 %44 %43 +%123 = OpConstantComposite %4 %44 %44 %43 %43 +%124 = OpConstantComposite %4 %43 %121 %44 %43 +%125 = OpConstantComposite %4 %43 %44 %44 %43 +%126 = OpConstantComposite %9 %42 %15 %57 +%127 = OpConstantComposite %4 %43 %44 %43 %43 +%129 = OpConstantNull %3 +%130 = OpConstantNull %16 +%131 = OpVariable %52 Input +%142 = OpTypePointer Workgroup %8 +%145 = OpTypePointer Workgroup %12 +%146 = OpTypePointer Workgroup %7 +%147 = OpTypePointer Workgroup %4 +%163 = OpTypePointer Workgroup %14 +%164 = OpTypePointer Workgroup %10 +%165 = OpTypePointer Workgroup %9 +%167 = OpTypePointer Workgroup %5 +%176 = OpTypePointer Function %8 +%195 = OpTypePointer Output %4 +%210 = OpTypePointer Output %9 +%214 = OpTypePointer Output %5 +%226 = OpVariable %38 Input +%228 = OpVariable %52 Input +%229 = OpTypeStruct %4 +%230 = OpTypeArray %229 %13 +%231 = OpTypePointer Output %230 +%232 = OpVariable %231 Output +%233 = OpTypeStruct %5 +%234 = OpTypeArray %233 %15 +%235 = OpTypePointer Output %234 +%236 = OpVariable %235 Output +%237 = OpTypeArray %4 %13 +%238 = OpTypePointer Output %237 +%239 = OpVariable %238 Output +%240 = OpTypeArray %9 %15 +%241 = OpTypePointer Output %240 +%242 = OpVariable %241 Output +%243 = OpTypeArray %4 %15 +%244 = OpTypePointer Output %243 +%245 = OpVariable %244 Output +%247 = OpConstantFalse %5 +%249 = OpVariable %52 Input +%323 = OpTypePointer Input %4 +%322 = OpVariable %323 Input +%325 = OpVariable %323 Input +%328 = OpVariable %323 Input +%330 = OpVariable %195 Output +%24 = OpFunction %5 None %25 %23 = OpLabel -OpBranch %31 -%31 = OpLabel -%35 = OpLoad %9 %33 -%38 = OpIEqual %37 %35 %36 -%39 = OpAll %5 %38 -OpSelectionMerge %40 None -OpBranchConditional %39 %41 %40 -%41 = OpLabel -OpStore %19 %32 -OpBranch %40 -%40 = OpLabel -OpControlBarrier %42 %42 %43 -OpBranch %44 -%44 = OpLabel -OpStore %19 %26 -%47 = OpAccessChain %45 %17 %46 -OpStore %47 %28 -%49 = OpAccessChain %48 %17 %15 -OpStore %49 %29 -OpControlBarrier %42 %42 %43 -%50 = OpCompositeExtract %8 %30 0 -%51 = OpCompositeExtract %8 %30 1 -%52 = OpCompositeExtract %8 %30 2 -OpEmitMeshTasksEXT %50 %51 %52 %17 +OpBranch %26 +%26 = OpLabel +%28 = OpAccessChain %27 %17 %15 +%29 = OpLoad %5 %28 +OpReturnValue %29 OpFunctionEnd -%57 = OpFunction %2 None %25 -%53 = OpLabel -%56 = OpLoad %8 %54 +%32 = OpFunction %2 None %33 +%31 = OpFunctionParameter %5 +%30 = OpLabel +OpBranch %34 +%34 = OpLabel +%35 = OpAccessChain %27 %17 %15 +OpStore %35 %31 +OpReturn +OpFunctionEnd +%40 = OpFunction %9 None %41 +%36 = OpLabel +%39 = OpLoad %9 %37 +OpBranch %49 +%49 = OpLabel +%53 = OpLoad %8 %51 +%54 = OpIEqual %5 %53 %42 +OpSelectionMerge %55 None +OpBranchConditional %54 %56 %55 +%56 = OpLabel +OpStore %17 %50 +OpBranch %55 +%55 = OpLabel +OpControlBarrier %57 %57 %58 OpBranch %59 %59 = OpLabel -%60 = OpIEqual %5 %56 %46 -OpSelectionMerge %61 None -OpBranchConditional %60 %62 %61 +%60 = OpCompositeExtract %8 %39 0 +%61 = OpIEqual %5 %60 %42 +OpSelectionMerge %62 None +OpBranchConditional %61 %63 %62 +%63 = OpLabel +%65 = OpAccessChain %64 %17 %42 +OpStore %65 %45 +%66 = OpFunctionCall %2 %32 %46 +%67 = OpFunctionCall %5 %24 +%68 = OpAccessChain %27 %17 %15 +OpStore %68 %67 +OpReturnValue %47 %62 = OpLabel -%63 = OpAccessChain %45 %17 %46 -OpStore %63 %28 -%64 = OpAccessChain %48 %17 %15 -OpStore %64 %29 -OpControlBarrier %42 %42 %43 -%65 = OpCompositeExtract %8 %30 0 -%66 = OpCompositeExtract %8 %30 1 -%67 = OpCompositeExtract %8 %30 2 -OpEmitMeshTasksEXT %65 %66 %67 %17 -%61 = OpLabel -OpControlBarrier %42 %42 %43 -%68 = OpCompositeExtract %8 %58 0 -%69 = OpCompositeExtract %8 %58 1 -%70 = OpCompositeExtract %8 %58 2 -OpEmitMeshTasksEXT %68 %69 %70 %17 +OpReturnValue %48 OpFunctionEnd -%95 = OpFunction %2 None %25 +%69 = OpFunction %2 None %70 %71 = OpLabel -%74 = OpVariable %76 Function -%75 = OpVariable %76 Function -%73 = OpLoad %8 %72 -OpBranch %105 -%105 = OpLabel -%108 = OpLoad %9 %107 -%109 = OpIEqual %37 %108 %36 -%110 = OpAll %5 %109 -OpSelectionMerge %111 None -OpBranchConditional %110 %112 %111 -%112 = OpLabel -OpStore %19 %32 -OpStore %21 %106 -OpBranch %111 -%111 = OpLabel -OpControlBarrier %42 %42 %43 -OpBranch %113 -%113 = OpLabel -%115 = OpAccessChain %114 %21 %42 -OpStore %115 %13 -%116 = OpAccessChain %114 %21 %13 -OpStore %116 %15 -OpStore %19 %96 -%120 = OpAccessChain %119 %21 %46 %46 %46 -OpStore %120 %97 -%121 = OpAccessChain %45 %17 %46 -%122 = OpLoad %4 %121 -%123 = OpFMul %4 %97 %122 -%124 = OpAccessChain %119 %21 %46 %46 %15 -OpStore %124 %123 -%125 = OpAccessChain %119 %21 %46 %15 %46 -OpStore %125 %99 -%126 = OpAccessChain %45 %17 %46 -%127 = OpLoad %4 %126 -%128 = OpFMul %4 %100 %127 -%129 = OpAccessChain %119 %21 %46 %15 %15 -OpStore %129 %128 -%130 = OpAccessChain %119 %21 %46 %42 %46 -OpStore %130 %101 -%131 = OpAccessChain %45 %17 %46 -%132 = OpLoad %4 %131 -%133 = OpFMul %4 %102 %132 -%134 = OpAccessChain %119 %21 %46 %42 %15 -OpStore %134 %133 -%138 = OpAccessChain %137 %21 %15 %46 %46 -OpStore %138 %103 -%140 = OpAccessChain %48 %17 %15 -%141 = OpLoad %5 %140 -%142 = OpLogicalNot %5 %141 -%143 = OpAccessChain %139 %21 %15 %46 %15 -OpStore %143 %142 -%144 = OpAccessChain %119 %21 %15 %46 %42 -OpStore %144 %104 -OpBranch %77 -%77 = OpLabel -OpControlBarrier %42 %42 %43 -%145 = OpAccessChain %114 %21 %42 -%146 = OpLoad %8 %145 -%147 = OpExtInst %8 %1 UMin %146 %13 -%148 = OpAccessChain %114 %21 %13 -%149 = OpLoad %8 %148 -%150 = OpExtInst %8 %1 UMin %149 %15 -%151 = OpAccessChain %117 %21 %46 -%152 = OpAccessChain %135 %21 %15 -OpControlBarrier %42 %42 %43 -OpSetMeshOutputsEXT %147 %150 -OpStore %74 %73 -OpBranch %153 -%153 = OpLabel -OpLoopMerge %155 %166 None -OpBranch %165 -%165 = OpLabel -%168 = OpLoad %8 %74 -%169 = OpULessThan %5 %168 %147 -OpBranchConditional %169 %167 %155 -%167 = OpLabel -%157 = OpLoad %8 %74 -%158 = OpAccessChain %119 %151 %157 %46 -%159 = OpLoad %4 %158 -%160 = OpAccessChain %161 %81 %157 %46 -OpStore %160 %159 -%162 = OpAccessChain %119 %151 %157 %15 -%163 = OpLoad %4 %162 -%164 = OpAccessChain %161 %88 %157 -OpStore %164 %163 -OpBranch %166 -%166 = OpLabel -%170 = OpLoad %8 %74 -%171 = OpIAdd %8 %170 %15 -OpStore %74 %171 -OpBranch %153 -%155 = OpLabel -OpStore %75 %73 -OpBranch %154 -%154 = OpLabel -OpLoopMerge %156 %185 None -OpBranch %184 -%184 = OpLabel -%187 = OpLoad %8 %75 -%188 = OpULessThan %5 %187 %150 -OpBranchConditional %188 %186 %156 -%186 = OpLabel -%172 = OpLoad %8 %75 -%173 = OpAccessChain %137 %152 %172 %46 -%174 = OpLoad %9 %173 -%175 = OpAccessChain %176 %91 %172 -OpStore %175 %174 -%177 = OpAccessChain %139 %152 %172 %15 -%178 = OpLoad %5 %177 -%179 = OpAccessChain %180 %85 %172 %46 -OpStore %179 %178 -%181 = OpAccessChain %119 %152 %172 %42 -%182 = OpLoad %4 %181 -%183 = OpAccessChain %161 %94 %172 -OpStore %183 %182 -OpBranch %185 -%185 = OpLabel -%189 = OpLoad %8 %75 -%190 = OpIAdd %8 %189 %15 -OpStore %75 %190 -OpBranch %154 -%156 = OpLabel +%72 = OpFunctionCall %9 %40 +OpControlBarrier %57 %57 %58 +%79 = OpCompositeExtract %8 %72 0 +%76 = OpUConvert %74 %79 +%80 = OpCompositeExtract %8 %72 1 +%77 = OpUConvert %74 %80 +%81 = OpCompositeExtract %8 %72 2 +%78 = OpUConvert %74 %81 +%82 = OpIMul %74 %76 %77 +%83 = OpIMul %74 %82 %78 +%84 = OpUGreaterThanEqual %5 %83 %75 +%85 = OpUGreaterThanEqual %5 %76 %73 +%86 = OpUGreaterThanEqual %5 %77 %73 +%87 = OpUGreaterThanEqual %5 %78 %73 +%88 = OpLogicalOr %5 %84 %85 +%89 = OpLogicalOr %5 %88 %86 +%90 = OpLogicalOr %5 %89 %87 +%91 = OpCompositeConstruct %9 %42 %42 %42 +%92 = OpSelect %9 %90 %91 %72 +%93 = OpCompositeExtract %8 %92 0 +%94 = OpCompositeExtract %8 %92 1 +%95 = OpCompositeExtract %8 %92 2 +OpEmitMeshTasksEXT %93 %94 %95 %17 +OpFunctionEnd +%118 = OpFunction %2 None %70 +%96 = OpLabel +%98 = OpLoad %9 %97 +OpBranch %128 +%128 = OpLabel +%132 = OpLoad %8 %131 +%133 = OpIEqual %5 %132 %42 +OpSelectionMerge %134 None +OpBranchConditional %133 %135 %134 +%135 = OpLabel +OpStore %19 %129 +OpStore %21 %130 +OpBranch %134 +%134 = OpLabel +OpControlBarrier %57 %57 %58 +OpBranch %136 +%136 = OpLabel +%137 = OpCompositeExtract %8 %98 0 +%138 = OpIEqual %5 %137 %42 +OpSelectionMerge %139 None +OpBranchConditional %138 %140 %141 +%140 = OpLabel +%143 = OpAccessChain %142 %21 %57 +OpStore %143 %13 +%144 = OpAccessChain %142 %21 %13 +OpStore %144 %15 +OpStore %19 %119 +%148 = OpAccessChain %147 %21 %42 %42 %42 +OpStore %148 %120 +%149 = OpAccessChain %64 %17 %42 +%150 = OpLoad %4 %149 +%151 = OpFMul %4 %120 %150 +%152 = OpAccessChain %147 %21 %42 %42 %15 +OpStore %152 %151 +%153 = OpAccessChain %147 %21 %42 %15 %42 +OpStore %153 %122 +%154 = OpAccessChain %64 %17 %42 +%155 = OpLoad %4 %154 +%156 = OpFMul %4 %123 %155 +%157 = OpAccessChain %147 %21 %42 %15 %15 +OpStore %157 %156 +%158 = OpAccessChain %147 %21 %42 %57 %42 +OpStore %158 %124 +%159 = OpAccessChain %64 %17 %42 +%160 = OpLoad %4 %159 +%161 = OpFMul %4 %125 %160 +%162 = OpAccessChain %147 %21 %42 %57 %15 +OpStore %162 %161 +%166 = OpAccessChain %165 %21 %15 %42 %42 +OpStore %166 %126 +%168 = OpFunctionCall %5 %24 +%169 = OpLogicalNot %5 %168 +%170 = OpAccessChain %167 %21 %15 %42 %15 +OpStore %170 %169 +%171 = OpAccessChain %147 %21 %15 %42 %57 +OpStore %171 %127 +OpReturn +%141 = OpLabel +OpReturn +%139 = OpLabel OpReturn OpFunctionEnd -%214 = OpFunction %2 None %25 -%191 = OpLabel -%194 = OpVariable %76 Function -%195 = OpVariable %76 Function -%193 = OpLoad %8 %192 -OpBranch %216 -%216 = OpLabel -%218 = OpLoad %9 %217 -%219 = OpIEqual %37 %218 %36 -%220 = OpAll %5 %219 -OpSelectionMerge %221 None -OpBranchConditional %220 %222 %221 -%222 = OpLabel -OpStore %19 %32 -OpStore %21 %106 -OpBranch %221 -%221 = OpLabel -OpControlBarrier %42 %42 %43 -OpBranch %223 -%223 = OpLabel -%224 = OpAccessChain %114 %21 %42 -OpStore %224 %13 -%225 = OpAccessChain %114 %21 %13 -OpStore %225 %15 -OpStore %19 %96 -%226 = OpAccessChain %119 %21 %46 %46 %46 -OpStore %226 %97 -%227 = OpAccessChain %119 %21 %46 %46 %15 -OpStore %227 %97 -%228 = OpAccessChain %119 %21 %46 %15 %46 -OpStore %228 %99 -%229 = OpAccessChain %119 %21 %46 %15 %15 -OpStore %229 %100 -%230 = OpAccessChain %119 %21 %46 %42 %46 -OpStore %230 %101 -%231 = OpAccessChain %119 %21 %46 %42 %15 -OpStore %231 %102 -%232 = OpAccessChain %137 %21 %15 %46 %46 -OpStore %232 %103 -%233 = OpAccessChain %139 %21 %15 %46 %15 -OpStore %233 %215 -%234 = OpAccessChain %119 %21 %15 %46 %42 -OpStore %234 %104 -OpBranch %196 -%196 = OpLabel -OpControlBarrier %42 %42 %43 -%235 = OpAccessChain %114 %21 %42 -%236 = OpLoad %8 %235 -%237 = OpExtInst %8 %1 UMin %236 %13 -%238 = OpAccessChain %114 %21 %13 -%239 = OpLoad %8 %238 -%240 = OpExtInst %8 %1 UMin %239 %15 -%241 = OpAccessChain %117 %21 %46 -%242 = OpAccessChain %135 %21 %15 -OpControlBarrier %42 %42 %43 -OpSetMeshOutputsEXT %237 %240 +%172 = OpFunction %2 None %70 +%173 = OpLabel +%174 = OpVariable %176 Function +%175 = OpVariable %176 Function +%177 = OpLoad %8 %99 +%178 = OpFunctionCall %2 %118 +OpControlBarrier %57 %57 %58 +%179 = OpAccessChain %142 %21 %57 +%180 = OpLoad %8 %179 +%181 = OpExtInst %8 %1 UMin %180 %13 +%182 = OpAccessChain %142 %21 %13 +%183 = OpLoad %8 %182 +%184 = OpExtInst %8 %1 UMin %183 %15 +%185 = OpAccessChain %145 %21 %42 +%186 = OpAccessChain %163 %21 %15 +OpSetMeshOutputsEXT %181 %184 +OpStore %174 %177 +OpBranch %187 +%187 = OpLabel +OpLoopMerge %189 %200 None +OpBranch %199 +%199 = OpLabel +%202 = OpLoad %8 %174 +%203 = OpULessThan %5 %202 %181 +OpBranchConditional %203 %201 %189 +%201 = OpLabel +%191 = OpLoad %8 %174 +%192 = OpAccessChain %147 %185 %191 %42 +%193 = OpLoad %4 %192 +%194 = OpAccessChain %195 %104 %191 %42 OpStore %194 %193 -OpBranch %243 -%243 = OpLabel -OpLoopMerge %245 %255 None +%196 = OpAccessChain %147 %185 %191 %15 +%197 = OpLoad %4 %196 +%198 = OpAccessChain %195 %111 %191 +OpStore %198 %197 +OpBranch %200 +%200 = OpLabel +%204 = OpLoad %8 %174 +%205 = OpIAdd %8 %204 %100 +OpStore %174 %205 +OpBranch %187 +%189 = OpLabel +OpStore %175 %177 +OpBranch %188 +%188 = OpLabel +OpLoopMerge %190 %219 None +OpBranch %218 +%218 = OpLabel +%221 = OpLoad %8 %175 +%222 = OpULessThan %5 %221 %184 +OpBranchConditional %222 %220 %190 +%220 = OpLabel +%206 = OpLoad %8 %175 +%207 = OpAccessChain %165 %186 %206 %42 +%208 = OpLoad %9 %207 +%209 = OpAccessChain %210 %114 %206 +OpStore %209 %208 +%211 = OpAccessChain %167 %186 %206 %15 +%212 = OpLoad %5 %211 +%213 = OpAccessChain %214 %108 %206 %42 +OpStore %213 %212 +%215 = OpAccessChain %147 %186 %206 %57 +%216 = OpLoad %4 %215 +%217 = OpAccessChain %195 %117 %206 +OpStore %217 %216 +OpBranch %219 +%219 = OpLabel +%223 = OpLoad %8 %175 +%224 = OpIAdd %8 %223 %100 +OpStore %175 %224 +OpBranch %188 +%190 = OpLabel +OpReturn +OpFunctionEnd +%246 = OpFunction %2 None %70 +%225 = OpLabel +%227 = OpLoad %9 %226 +OpBranch %248 +%248 = OpLabel +%250 = OpLoad %8 %249 +%251 = OpIEqual %5 %250 %42 +OpSelectionMerge %252 None +OpBranchConditional %251 %253 %252 +%253 = OpLabel +OpStore %19 %129 +OpStore %21 %130 +OpBranch %252 +%252 = OpLabel +OpControlBarrier %57 %57 %58 OpBranch %254 %254 = OpLabel -%257 = OpLoad %8 %194 -%258 = OpULessThan %5 %257 %237 -OpBranchConditional %258 %256 %245 -%256 = OpLabel -%247 = OpLoad %8 %194 -%248 = OpAccessChain %119 %241 %247 %46 -%249 = OpLoad %4 %248 -%250 = OpAccessChain %161 %200 %247 %46 -OpStore %250 %249 -%251 = OpAccessChain %119 %241 %247 %15 -%252 = OpLoad %4 %251 -%253 = OpAccessChain %161 %207 %247 -OpStore %253 %252 -OpBranch %255 -%255 = OpLabel -%259 = OpLoad %8 %194 -%260 = OpIAdd %8 %259 %15 -OpStore %194 %260 -OpBranch %243 -%245 = OpLabel -OpStore %195 %193 -OpBranch %244 -%244 = OpLabel -OpLoopMerge %246 %272 None -OpBranch %271 -%271 = OpLabel -%274 = OpLoad %8 %195 -%275 = OpULessThan %5 %274 %240 -OpBranchConditional %275 %273 %246 -%273 = OpLabel -%261 = OpLoad %8 %195 -%262 = OpAccessChain %137 %242 %261 %46 -%263 = OpLoad %9 %262 -%264 = OpAccessChain %176 %210 %261 -OpStore %264 %263 -%265 = OpAccessChain %139 %242 %261 %15 -%266 = OpLoad %5 %265 -%267 = OpAccessChain %180 %204 %261 %46 -OpStore %267 %266 -%268 = OpAccessChain %119 %242 %261 %42 -%269 = OpLoad %4 %268 -%270 = OpAccessChain %161 %213 %261 -OpStore %270 %269 -OpBranch %272 -%272 = OpLabel -%276 = OpLoad %8 %195 -%277 = OpIAdd %8 %276 %15 -OpStore %195 %277 -OpBranch %244 -%246 = OpLabel +%255 = OpCompositeExtract %8 %227 0 +%256 = OpIEqual %5 %255 %42 +OpSelectionMerge %257 None +OpBranchConditional %256 %258 %259 +%258 = OpLabel +%260 = OpAccessChain %142 %21 %57 +OpStore %260 %13 +%261 = OpAccessChain %142 %21 %13 +OpStore %261 %15 +OpStore %19 %119 +%262 = OpAccessChain %147 %21 %42 %42 %42 +OpStore %262 %120 +%263 = OpAccessChain %147 %21 %42 %42 %15 +OpStore %263 %120 +%264 = OpAccessChain %147 %21 %42 %15 %42 +OpStore %264 %122 +%265 = OpAccessChain %147 %21 %42 %15 %15 +OpStore %265 %123 +%266 = OpAccessChain %147 %21 %42 %57 %42 +OpStore %266 %124 +%267 = OpAccessChain %147 %21 %42 %57 %15 +OpStore %267 %125 +%268 = OpAccessChain %165 %21 %15 %42 %42 +OpStore %268 %126 +%269 = OpAccessChain %167 %21 %15 %42 %15 +OpStore %269 %247 +%270 = OpAccessChain %147 %21 %15 %42 %57 +OpStore %270 %127 +OpReturn +%259 = OpLabel +OpReturn +%257 = OpLabel OpReturn OpFunctionEnd -%301 = OpFunction %2 None %25 -%278 = OpLabel -%281 = OpVariable %76 Function -%282 = OpVariable %76 Function -%280 = OpLoad %8 %279 -OpBranch %302 -%302 = OpLabel -%304 = OpLoad %9 %303 -%305 = OpIEqual %37 %304 %36 -%306 = OpAll %5 %305 -OpSelectionMerge %307 None -OpBranchConditional %306 %308 %307 -%308 = OpLabel -OpStore %19 %32 -OpStore %21 %106 -OpBranch %307 -%307 = OpLabel -OpControlBarrier %42 %42 %43 -OpBranch %309 -%309 = OpLabel -%310 = OpIEqual %5 %280 %46 -OpSelectionMerge %311 None -OpBranchConditional %310 %312 %313 -%312 = OpLabel -%314 = OpAccessChain %114 %21 %42 -OpStore %314 %13 -%315 = OpAccessChain %114 %21 %13 -OpStore %315 %15 -OpStore %19 %96 -%316 = OpAccessChain %119 %21 %46 %46 %46 -OpStore %316 %97 -%317 = OpAccessChain %119 %21 %46 %46 %15 -OpStore %317 %97 -%318 = OpAccessChain %119 %21 %46 %15 %46 -OpStore %318 %99 -%319 = OpAccessChain %119 %21 %46 %15 %15 -OpStore %319 %100 -%320 = OpAccessChain %119 %21 %46 %42 %46 -OpStore %320 %101 -%321 = OpAccessChain %119 %21 %46 %42 %15 -OpStore %321 %102 -%322 = OpAccessChain %137 %21 %15 %46 %46 -OpStore %322 %103 -%323 = OpAccessChain %139 %21 %15 %46 %15 -OpStore %323 %215 -%324 = OpAccessChain %119 %21 %15 %46 %42 -OpStore %324 %104 -OpBranch %283 +%271 = OpFunction %2 None %70 +%272 = OpLabel +%273 = OpVariable %176 Function +%274 = OpVariable %176 Function +%275 = OpLoad %8 %228 +%276 = OpFunctionCall %2 %246 +OpControlBarrier %57 %57 %58 +%277 = OpAccessChain %142 %21 %57 +%278 = OpLoad %8 %277 +%279 = OpExtInst %8 %1 UMin %278 %13 +%280 = OpAccessChain %142 %21 %13 +%281 = OpLoad %8 %280 +%282 = OpExtInst %8 %1 UMin %281 %15 +%283 = OpAccessChain %145 %21 %42 +%284 = OpAccessChain %163 %21 %15 +OpSetMeshOutputsEXT %279 %282 +OpStore %273 %275 +OpBranch %285 +%285 = OpLabel +OpLoopMerge %287 %297 None +OpBranch %296 +%296 = OpLabel +%299 = OpLoad %8 %273 +%300 = OpULessThan %5 %299 %279 +OpBranchConditional %300 %298 %287 +%298 = OpLabel +%289 = OpLoad %8 %273 +%290 = OpAccessChain %147 %283 %289 %42 +%291 = OpLoad %4 %290 +%292 = OpAccessChain %195 %232 %289 %42 +OpStore %292 %291 +%293 = OpAccessChain %147 %283 %289 %15 +%294 = OpLoad %4 %293 +%295 = OpAccessChain %195 %239 %289 +OpStore %295 %294 +OpBranch %297 +%297 = OpLabel +%301 = OpLoad %8 %273 +%302 = OpIAdd %8 %301 %100 +OpStore %273 %302 +OpBranch %285 +%287 = OpLabel +OpStore %274 %275 +OpBranch %286 +%286 = OpLabel +OpLoopMerge %288 %314 None +OpBranch %313 %313 = OpLabel -OpBranch %283 -%311 = OpLabel -OpReturn -%283 = OpLabel -OpControlBarrier %42 %42 %43 -%325 = OpAccessChain %114 %21 %42 -%326 = OpLoad %8 %325 -%327 = OpExtInst %8 %1 UMin %326 %13 -%328 = OpAccessChain %114 %21 %13 -%329 = OpLoad %8 %328 -%330 = OpExtInst %8 %1 UMin %329 %15 -%331 = OpAccessChain %117 %21 %46 -%332 = OpAccessChain %135 %21 %15 -OpControlBarrier %42 %42 %43 -OpSetMeshOutputsEXT %327 %330 -OpStore %281 %280 -OpBranch %333 -%333 = OpLabel -OpLoopMerge %335 %345 None -OpBranch %344 -%344 = OpLabel -%347 = OpLoad %8 %281 -%348 = OpULessThan %5 %347 %327 -OpBranchConditional %348 %346 %335 -%346 = OpLabel -%337 = OpLoad %8 %281 -%338 = OpAccessChain %119 %331 %337 %46 -%339 = OpLoad %4 %338 -%340 = OpAccessChain %161 %287 %337 %46 -OpStore %340 %339 -%341 = OpAccessChain %119 %331 %337 %15 -%342 = OpLoad %4 %341 -%343 = OpAccessChain %161 %294 %337 -OpStore %343 %342 -OpBranch %345 -%345 = OpLabel -%349 = OpLoad %8 %281 -%350 = OpIAdd %8 %349 %15 -OpStore %281 %350 -OpBranch %333 -%335 = OpLabel -OpStore %282 %280 -OpBranch %334 -%334 = OpLabel -OpLoopMerge %336 %362 None -OpBranch %361 -%361 = OpLabel -%364 = OpLoad %8 %282 -%365 = OpULessThan %5 %364 %330 -OpBranchConditional %365 %363 %336 -%363 = OpLabel -%351 = OpLoad %8 %282 -%352 = OpAccessChain %137 %332 %351 %46 -%353 = OpLoad %9 %352 -%354 = OpAccessChain %176 %297 %351 -OpStore %354 %353 -%355 = OpAccessChain %139 %332 %351 %15 -%356 = OpLoad %5 %355 -%357 = OpAccessChain %180 %291 %351 %46 -OpStore %357 %356 -%358 = OpAccessChain %119 %332 %351 %42 -%359 = OpLoad %4 %358 -%360 = OpAccessChain %161 %300 %351 -OpStore %360 %359 -OpBranch %362 -%362 = OpLabel -%366 = OpLoad %8 %282 -%367 = OpIAdd %8 %366 %15 -OpStore %282 %367 -OpBranch %334 -%336 = OpLabel +%316 = OpLoad %8 %274 +%317 = OpULessThan %5 %316 %282 +OpBranchConditional %317 %315 %288 +%315 = OpLabel +%303 = OpLoad %8 %274 +%304 = OpAccessChain %165 %284 %303 %42 +%305 = OpLoad %9 %304 +%306 = OpAccessChain %210 %242 %303 +OpStore %306 %305 +%307 = OpAccessChain %167 %284 %303 %15 +%308 = OpLoad %5 %307 +%309 = OpAccessChain %214 %236 %303 %42 +OpStore %309 %308 +%310 = OpAccessChain %147 %284 %303 %57 +%311 = OpLoad %4 %310 +%312 = OpAccessChain %195 %245 %303 +OpStore %312 %311 +OpBranch %314 +%314 = OpLabel +%318 = OpLoad %8 %274 +%319 = OpIAdd %8 %318 %100 +OpStore %274 %319 +OpBranch %286 +%288 = OpLabel OpReturn OpFunctionEnd -%379 = OpFunction %2 None %25 -%368 = OpLabel -%372 = OpLoad %4 %370 -%374 = OpLoad %4 %373 -%369 = OpCompositeConstruct %7 %372 %374 -%377 = OpLoad %4 %376 -%375 = OpCompositeConstruct %11 %377 -OpBranch %380 -%380 = OpLabel -%381 = OpCompositeExtract %4 %369 1 -%382 = OpCompositeExtract %4 %375 0 -%383 = OpFMul %4 %381 %382 -OpStore %378 %383 +%331 = OpFunction %2 None %70 +%320 = OpLabel +%324 = OpLoad %4 %322 +%326 = OpLoad %4 %325 +%321 = OpCompositeConstruct %7 %324 %326 +%329 = OpLoad %4 %328 +%327 = OpCompositeConstruct %11 %329 +OpBranch %332 +%332 = OpLabel +%333 = OpCompositeExtract %4 %321 1 +%334 = OpCompositeExtract %4 %327 0 +%335 = OpFMul %4 %333 %334 +OpStore %330 %335 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-overrides-atomicCompareExchangeWeak.f.spvasm b/naga/tests/out/spv/wgsl-overrides-atomicCompareExchangeWeak.f.spvasm index 45e71af552..f32cd756b1 100644 --- a/naga/tests/out/spv/wgsl-overrides-atomicCompareExchangeWeak.f.spvasm +++ b/naga/tests/out/spv/wgsl-overrides-atomicCompareExchangeWeak.f.spvasm @@ -1,15 +1,15 @@ ; SPIR-V ; Version: 1.0 ; Generator: rspirv -; Bound: 33 +; Bound: 29 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %11 "f" %18 +OpEntryPoint GLCompute %11 "f" %17 OpExecutionMode %11 LocalSize 1 1 1 OpMemberDecorate %6 0 Offset 0 OpMemberDecorate %6 1 Offset 4 -OpDecorate %18 BuiltIn LocalInvocationId +OpDecorate %17 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 1 %4 = OpTypeInt 32 0 @@ -22,31 +22,27 @@ OpDecorate %18 BuiltIn LocalInvocationId %13 = OpConstant %4 2 %14 = OpConstant %4 1 %16 = OpConstantNull %4 -%17 = OpTypeVector %4 3 -%19 = OpTypePointer Input %17 -%18 = OpVariable %19 Input -%21 = OpConstantNull %17 -%22 = OpTypeVector %5 3 -%27 = OpConstant %4 264 -%30 = OpConstant %4 0 +%18 = OpTypePointer Input %4 +%17 = OpVariable %18 Input +%20 = OpConstant %4 0 +%24 = OpConstant %4 264 %11 = OpFunction %2 None %12 %10 = OpLabel OpBranch %15 %15 = OpLabel -%20 = OpLoad %17 %18 -%23 = OpIEqual %22 %20 %21 -%24 = OpAll %5 %23 -OpSelectionMerge %25 None -OpBranchConditional %24 %26 %25 -%26 = OpLabel +%19 = OpLoad %4 %17 +%21 = OpIEqual %5 %19 %20 +OpSelectionMerge %22 None +OpBranchConditional %21 %23 %22 +%23 = OpLabel OpStore %8 %16 +OpBranch %22 +%22 = OpLabel +OpControlBarrier %13 %13 %24 OpBranch %25 %25 = OpLabel -OpControlBarrier %13 %13 %27 -OpBranch %28 -%28 = OpLabel -%31 = OpAtomicCompareExchange %4 %8 %7 %30 %30 %14 %13 -%32 = OpIEqual %5 %31 %13 -%29 = OpCompositeConstruct %6 %31 %32 +%27 = OpAtomicCompareExchange %4 %8 %7 %20 %20 %14 %13 +%28 = OpIEqual %5 %27 %13 +%26 = OpCompositeConstruct %6 %27 %28 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-policy-mix.spvasm b/naga/tests/out/spv/wgsl-policy-mix.spvasm index 8d9209d7b3..23f3313349 100644 --- a/naga/tests/out/spv/wgsl-policy-mix.spvasm +++ b/naga/tests/out/spv/wgsl-policy-mix.spvasm @@ -1,13 +1,13 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 126 +; Bound: 123 OpCapability Shader OpCapability ImageQuery OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %102 "main" %115 +OpEntryPoint GLCompute %102 "main" %114 OpExecutionMode %102 LocalSize 1 1 1 %3 = OpString "policy-mix.wgsl" OpSource Unknown 0 %3 "// Tests that the index, buffer, and texture bounds checks policies are @@ -82,7 +82,7 @@ OpDecorate %26 Block OpMemberDecorate %26 0 Offset 0 OpDecorate %28 DescriptorSet 0 OpDecorate %28 Binding 2 -OpDecorate %115 BuiltIn LocalInvocationId +OpDecorate %114 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %4 = OpTypeFloat 32 %5 = OpTypeVector %4 4 @@ -147,11 +147,9 @@ OpDecorate %115 BuiltIn LocalInvocationId %110 = OpConstant %18 3 %111 = OpConstant %18 4 %113 = OpConstantNull %14 -%114 = OpTypeVector %8 3 -%116 = OpTypePointer Input %114 -%115 = OpVariable %116 Input -%118 = OpConstantNull %114 -%123 = OpConstant %8 264 +%115 = OpTypePointer Input %8 +%114 = OpVariable %115 Input +%120 = OpConstant %8 264 %39 = OpFunction %5 None %40 %36 = OpFunctionParameter %19 %37 = OpFunctionParameter %18 @@ -214,21 +212,20 @@ OpFunctionEnd %106 = OpLoad %13 %28 OpBranch %112 %112 = OpLabel -%117 = OpLoad %114 %115 -%119 = OpIEqual %74 %117 %118 -%120 = OpAll %67 %119 -OpSelectionMerge %121 None -OpBranchConditional %120 %122 %121 -%122 = OpLabel +%116 = OpLoad %8 %114 +%117 = OpIEqual %67 %116 %42 +OpSelectionMerge %118 None +OpBranchConditional %117 %119 %118 +%119 = OpLabel OpStore %30 %113 +OpBranch %118 +%118 = OpLabel +OpControlBarrier %21 %21 %120 OpBranch %121 %121 = OpLabel -OpControlBarrier %21 %21 %123 -OpBranch %124 -%124 = OpLabel OpLine %3 37 19 OpLine %3 37 19 OpLine %3 37 5 -%125 = OpFunctionCall %5 %39 %109 %110 %111 +%122 = OpFunctionCall %5 %39 %109 %110 %111 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-workgroup-uniform-load.spvasm b/naga/tests/out/spv/wgsl-workgroup-uniform-load.spvasm index c2d0de7a04..e82a9a70ba 100644 --- a/naga/tests/out/spv/wgsl-workgroup-uniform-load.spvasm +++ b/naga/tests/out/spv/wgsl-workgroup-uniform-load.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 39 +; Bound: 38 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 @@ -9,7 +9,7 @@ OpEntryPoint GLCompute %14 "test_workgroupUniformLoad" %11 %19 OpExecutionMode %14 LocalSize 4 1 1 OpDecorate %5 ArrayStride 4 OpDecorate %11 BuiltIn WorkgroupId -OpDecorate %19 BuiltIn LocalInvocationId +OpDecorate %19 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeInt 32 1 @@ -23,43 +23,42 @@ OpDecorate %19 BuiltIn LocalInvocationId %15 = OpTypeFunction %2 %16 = OpConstant %4 10 %18 = OpConstantNull %5 -%19 = OpVariable %12 Input -%21 = OpConstantNull %7 -%23 = OpTypeBool -%22 = OpTypeVector %23 3 -%28 = OpConstant %3 2 -%29 = OpConstant %3 264 -%32 = OpTypePointer Workgroup %4 +%20 = OpTypePointer Input %3 +%19 = OpVariable %20 Input +%22 = OpConstant %3 0 +%24 = OpTypeBool +%27 = OpConstant %3 2 +%28 = OpConstant %3 264 +%31 = OpTypePointer Workgroup %4 %14 = OpFunction %2 None %15 %10 = OpLabel %13 = OpLoad %7 %11 OpBranch %17 %17 = OpLabel -%20 = OpLoad %7 %19 -%24 = OpIEqual %22 %20 %21 -%25 = OpAll %23 %24 -OpSelectionMerge %26 None -OpBranchConditional %25 %27 %26 -%27 = OpLabel -OpStore %8 %18 -OpBranch %26 +%21 = OpLoad %3 %19 +%23 = OpIEqual %24 %21 %22 +OpSelectionMerge %25 None +OpBranchConditional %23 %26 %25 %26 = OpLabel -OpControlBarrier %28 %28 %29 -OpBranch %30 -%30 = OpLabel -%31 = OpCompositeExtract %3 %13 0 -OpControlBarrier %28 %28 %29 -%33 = OpAccessChain %32 %8 %31 -%34 = OpLoad %4 %33 -OpControlBarrier %28 %28 %29 -%35 = OpSGreaterThan %23 %34 %16 -OpSelectionMerge %36 None -OpBranchConditional %35 %37 %38 -%37 = OpLabel -OpControlBarrier %28 %28 %29 +OpStore %8 %18 +OpBranch %25 +%25 = OpLabel +OpControlBarrier %27 %27 %28 +OpBranch %29 +%29 = OpLabel +%30 = OpCompositeExtract %3 %13 0 +OpControlBarrier %27 %27 %28 +%32 = OpAccessChain %31 %8 %30 +%33 = OpLoad %4 %32 +OpControlBarrier %27 %27 %28 +%34 = OpSGreaterThan %24 %33 %16 +OpSelectionMerge %35 None +OpBranchConditional %34 %36 %37 +%36 = OpLabel +OpControlBarrier %27 %27 %28 OpReturn -%38 = OpLabel +%37 = OpLabel OpReturn -%36 = OpLabel +%35 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-workgroup-var-init.spvasm b/naga/tests/out/spv/wgsl-workgroup-var-init.spvasm index cb214f3e35..801ad15e94 100644 --- a/naga/tests/out/spv/wgsl-workgroup-var-init.spvasm +++ b/naga/tests/out/spv/wgsl-workgroup-var-init.spvasm @@ -1,12 +1,12 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 42 +; Bound: 38 OpCapability Shader OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %18 "main" %26 +OpEntryPoint GLCompute %18 "main" %25 OpExecutionMode %18 LocalSize 1 1 1 %3 = OpString "workgroup-var-init.wgsl" OpSource Unknown 0 %3 "struct WStruct { @@ -41,7 +41,7 @@ OpDecorate %14 DescriptorSet 0 OpDecorate %14 Binding 0 OpDecorate %15 Block OpMemberDecorate %15 0 Offset 0 -OpDecorate %26 BuiltIn LocalInvocationId +OpDecorate %25 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %4 = OpTypeInt 32 0 %6 = OpConstant %4 512 @@ -60,36 +60,32 @@ OpDecorate %26 BuiltIn LocalInvocationId %20 = OpTypePointer StorageBuffer %5 %21 = OpConstant %4 0 %24 = OpConstantNull %11 -%25 = OpTypeVector %4 3 -%27 = OpTypePointer Input %25 -%26 = OpVariable %27 Input -%29 = OpConstantNull %25 -%31 = OpTypeBool -%30 = OpTypeVector %31 3 -%36 = OpConstant %4 2 -%37 = OpConstant %4 264 -%39 = OpTypePointer Workgroup %5 +%26 = OpTypePointer Input %4 +%25 = OpVariable %26 Input +%29 = OpTypeBool +%32 = OpConstant %4 2 +%33 = OpConstant %4 264 +%35 = OpTypePointer Workgroup %5 %18 = OpFunction %2 None %19 %17 = OpLabel %22 = OpAccessChain %20 %14 %21 OpBranch %23 %23 = OpLabel -%28 = OpLoad %25 %26 -%32 = OpIEqual %30 %28 %29 -%33 = OpAll %31 %32 -OpSelectionMerge %34 None -OpBranchConditional %33 %35 %34 -%35 = OpLabel +%27 = OpLoad %4 %25 +%28 = OpIEqual %29 %27 %21 +OpSelectionMerge %30 None +OpBranchConditional %28 %31 %30 +%31 = OpLabel OpStore %12 %24 +OpBranch %30 +%30 = OpLabel +OpControlBarrier %32 %32 %33 OpBranch %34 %34 = OpLabel -OpControlBarrier %36 %36 %37 -OpBranch %38 -%38 = OpLabel OpLine %3 14 14 -%40 = OpAccessChain %39 %12 %21 -%41 = OpLoad %5 %40 +%36 = OpAccessChain %35 %12 %21 +%37 = OpLoad %5 %36 OpLine %3 14 5 -OpStore %22 %41 +OpStore %22 %37 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/wgsl-mesh-shader-empty.wgsl b/naga/tests/out/wgsl/wgsl-mesh-shader-empty.wgsl index c5e853af26..4920dbd865 100644 --- a/naga/tests/out/wgsl/wgsl-mesh-shader-empty.wgsl +++ b/naga/tests/out/wgsl/wgsl-mesh-shader-empty.wgsl @@ -22,12 +22,12 @@ struct MeshOutput { var taskPayload: TaskPayload; var mesh_output: MeshOutput; -@task @payload(taskPayload) @workgroup_size(1, 1, 1) +@task @payload(taskPayload) @workgroup_size(64, 1, 1) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1u, 1u, 1u); } -@mesh(mesh_output) @workgroup_size(1, 1, 1) @payload(taskPayload) +@mesh(mesh_output) @workgroup_size(64, 1, 1) @payload(taskPayload) fn ms_main() { return; } diff --git a/naga/tests/out/wgsl/wgsl-mesh-shader-lines.wgsl b/naga/tests/out/wgsl/wgsl-mesh-shader-lines.wgsl index fe7c341f30..85fb355995 100644 --- a/naga/tests/out/wgsl/wgsl-mesh-shader-lines.wgsl +++ b/naga/tests/out/wgsl/wgsl-mesh-shader-lines.wgsl @@ -22,12 +22,12 @@ struct MeshOutput { var taskPayload: TaskPayload; var mesh_output: MeshOutput; -@task @payload(taskPayload) @workgroup_size(1, 1, 1) +@task @payload(taskPayload) @workgroup_size(64, 1, 1) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1u, 1u, 1u); } -@mesh(mesh_output) @workgroup_size(1, 1, 1) @payload(taskPayload) +@mesh(mesh_output) @workgroup_size(64, 1, 1) @payload(taskPayload) fn ms_main() { return; } diff --git a/naga/tests/out/wgsl/wgsl-mesh-shader-points.wgsl b/naga/tests/out/wgsl/wgsl-mesh-shader-points.wgsl index b6eea73d08..0407e21c67 100644 --- a/naga/tests/out/wgsl/wgsl-mesh-shader-points.wgsl +++ b/naga/tests/out/wgsl/wgsl-mesh-shader-points.wgsl @@ -22,12 +22,12 @@ struct MeshOutput { var taskPayload: TaskPayload; var mesh_output: MeshOutput; -@task @payload(taskPayload) @workgroup_size(1, 1, 1) +@task @payload(taskPayload) @workgroup_size(64, 1, 1) fn ts_main() -> @builtin(mesh_task_size) vec3 { return vec3(1u, 1u, 1u); } -@mesh(mesh_output) @workgroup_size(1, 1, 1) @payload(taskPayload) +@mesh(mesh_output) @workgroup_size(64, 1, 1) @payload(taskPayload) fn ms_main() { return; } diff --git a/naga/tests/out/wgsl/wgsl-mesh-shader.wgsl b/naga/tests/out/wgsl/wgsl-mesh-shader.wgsl index 974027fdbb..8b8a52078c 100644 --- a/naga/tests/out/wgsl/wgsl-mesh-shader.wgsl +++ b/naga/tests/out/wgsl/wgsl-mesh-shader.wgsl @@ -31,65 +31,56 @@ var taskPayload: TaskPayload; var workgroupData: f32; var mesh_output: MeshOutput; -@task @payload(taskPayload) @workgroup_size(1, 1, 1) -fn ts_main() -> @builtin(mesh_task_size) vec3 { - workgroupData = 1f; - taskPayload.colorMask = vec4(1f, 1f, 0f, 1f); - taskPayload.visible = true; - return vec3(1u, 1u, 1u); +fn helper_reader() -> bool { + let _e2 = taskPayload.visible; + return _e2; } -@task @payload(taskPayload) @workgroup_size(2, 1, 1) -fn ts_divergent(@builtin(local_invocation_index) thread_id: u32) -> @builtin(mesh_task_size) vec3 { - if (thread_id == 0u) { +fn helper_writer(value: bool) { + taskPayload.visible = value; + return; +} + +@task @payload(taskPayload) @workgroup_size(64, 1, 1) +fn ts_main(@builtin(local_invocation_id) thread_id: vec3) -> @builtin(mesh_task_size) vec3 { + if (thread_id.x == 0u) { taskPayload.colorMask = vec4(1f, 1f, 0f, 1f); - taskPayload.visible = true; + helper_writer(true); + let _e14 = helper_reader(); + taskPayload.visible = _e14; return vec3(1u, 1u, 1u); } - return vec3(2u, 2u, 2u); -} - -@mesh(mesh_output) @workgroup_size(1, 1, 1) @payload(taskPayload) -fn ms_main() { - mesh_output.vertex_count = 3u; - mesh_output.primitive_count = 1u; - workgroupData = 2f; - mesh_output.vertices[0].position = vec4(0f, 1f, 0f, 1f); - let _e23 = taskPayload.colorMask; - mesh_output.vertices[0].color = (vec4(0f, 1f, 0f, 1f) * _e23); - mesh_output.vertices[1].position = vec4(-1f, -1f, 0f, 1f); - let _e45 = taskPayload.colorMask; - mesh_output.vertices[1].color = (vec4(0f, 0f, 1f, 1f) * _e45); - mesh_output.vertices[2].position = vec4(1f, -1f, 0f, 1f); - let _e67 = taskPayload.colorMask; - mesh_output.vertices[2].color = (vec4(1f, 0f, 0f, 1f) * _e67); - mesh_output.primitives[0].indices = vec3(0u, 1u, 2u); - let _e88 = taskPayload.visible; - mesh_output.primitives[0].cull = !(_e88); - mesh_output.primitives[0].colorMask = vec4(1f, 0f, 1f, 1f); - return; + return vec3(0u, 0u, 0u); } -@mesh(mesh_output) @workgroup_size(1, 1, 1) -fn ms_no_ts() { - mesh_output.vertex_count = 3u; - mesh_output.primitive_count = 1u; - workgroupData = 2f; - mesh_output.vertices[0].position = vec4(0f, 1f, 0f, 1f); - mesh_output.vertices[0].color = vec4(0f, 1f, 0f, 1f); - mesh_output.vertices[1].position = vec4(-1f, -1f, 0f, 1f); - mesh_output.vertices[1].color = vec4(0f, 0f, 1f, 1f); - mesh_output.vertices[2].position = vec4(1f, -1f, 0f, 1f); - mesh_output.vertices[2].color = vec4(1f, 0f, 0f, 1f); - mesh_output.primitives[0].indices = vec3(0u, 1u, 2u); - mesh_output.primitives[0].cull = false; - mesh_output.primitives[0].colorMask = vec4(1f, 0f, 1f, 1f); - return; +@mesh(mesh_output) @workgroup_size(64, 1, 1) @payload(taskPayload) +fn ms_main(@builtin(local_invocation_id) thread_id_1: vec3) { + if (thread_id_1.x == 0u) { + mesh_output.vertex_count = 3u; + mesh_output.primitive_count = 1u; + workgroupData = 2f; + mesh_output.vertices[0].position = vec4(0f, 1f, 0f, 1f); + let _e27 = taskPayload.colorMask; + mesh_output.vertices[0].color = (vec4(0f, 1f, 0f, 1f) * _e27); + mesh_output.vertices[1].position = vec4(-1f, -1f, 0f, 1f); + let _e49 = taskPayload.colorMask; + mesh_output.vertices[1].color = (vec4(0f, 0f, 1f, 1f) * _e49); + mesh_output.vertices[2].position = vec4(1f, -1f, 0f, 1f); + let _e71 = taskPayload.colorMask; + mesh_output.vertices[2].color = (vec4(1f, 0f, 0f, 1f) * _e71); + mesh_output.primitives[0].indices = vec3(0u, 1u, 2u); + let _e90 = helper_reader(); + mesh_output.primitives[0].cull = !(_e90); + mesh_output.primitives[0].colorMask = vec4(1f, 0f, 1f, 1f); + return; + } else { + return; + } } -@mesh(mesh_output) @workgroup_size(1, 1, 1) -fn ms_divergent(@builtin(local_invocation_index) thread_id_1: u32) { - if (thread_id_1 == 0u) { +@mesh(mesh_output) @workgroup_size(64, 1, 1) +fn ms_no_ts(@builtin(local_invocation_id) thread_id_2: vec3) { + if (thread_id_2.x == 0u) { mesh_output.vertex_count = 3u; mesh_output.primitive_count = 1u; workgroupData = 2f; diff --git a/tests/tests/wgpu-gpu/mesh_shader/shader.wgsl b/tests/tests/wgpu-gpu/mesh_shader/shader.wgsl index 13f6c225e4..7499bb5c5b 100644 --- a/tests/tests/wgpu-gpu/mesh_shader/shader.wgsl +++ b/tests/tests/wgpu-gpu/mesh_shader/shader.wgsl @@ -31,14 +31,25 @@ struct PrimitiveInput { var taskPayload: TaskPayload; var workgroupData: f32; +fn helper_reader() -> bool { + return taskPayload.visible; +} +fn helper_writer(value: bool) { + taskPayload.visible = value; +} + @task @payload(taskPayload) -@workgroup_size(1) -fn ts_main() -> @builtin(mesh_task_size) vec3 { - workgroupData = 1.0; - taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0); - taskPayload.visible = true; - return vec3(1, 1, 1); +@workgroup_size(64) +fn ts_main(@builtin(local_invocation_id) thread_id: vec3) -> @builtin(mesh_task_size) vec3 { + if thread_id.x == 0 { + taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0); + helper_writer(true); + taskPayload.visible = helper_reader(); + return vec3(1, 1, 1); + } + // Only the first thread's value is taken + return vec3(0, 0, 0); } struct MeshOutput { @@ -52,53 +63,34 @@ var mesh_output: MeshOutput; @mesh(mesh_output) @payload(taskPayload) -@workgroup_size(1) -fn ms_main() { - mesh_output.vertex_count = 3; - mesh_output.primitive_count = 1; - workgroupData = 2.0; +@workgroup_size(64) +fn ms_main(@builtin(local_invocation_id) thread_id: vec3) { + if thread_id.x == 0 { + mesh_output.vertex_count = 3; + mesh_output.primitive_count = 1; + workgroupData = 2.0; - mesh_output.vertices[0].position = positions[0]; - mesh_output.vertices[0].color = colors[0] * taskPayload.colorMask; + mesh_output.vertices[0].position = positions[0]; + mesh_output.vertices[0].color = colors[0] * taskPayload.colorMask; - mesh_output.vertices[1].position = positions[1]; - mesh_output.vertices[1].color = colors[1] * taskPayload.colorMask; + mesh_output.vertices[1].position = positions[1]; + mesh_output.vertices[1].color = colors[1] * taskPayload.colorMask; - mesh_output.vertices[2].position = positions[2]; - mesh_output.vertices[2].color = colors[2] * taskPayload.colorMask; + mesh_output.vertices[2].position = positions[2]; + mesh_output.vertices[2].color = colors[2] * taskPayload.colorMask; - mesh_output.primitives[0].indices = vec3(0, 1, 2); - mesh_output.primitives[0].cull = !taskPayload.visible; - mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); + mesh_output.primitives[0].indices = vec3(0, 1, 2); + mesh_output.primitives[0].cull = !helper_reader(); + mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); + return; + } } // Don't use task payload if no task shader is present @mesh(mesh_output) -@workgroup_size(1) -fn ms_no_ts() { - mesh_output.vertex_count = 3; - mesh_output.primitive_count = 1; - workgroupData = 2.0; - - mesh_output.vertices[0].position = positions[0]; - mesh_output.vertices[0].color = colors[0]; - - mesh_output.vertices[1].position = positions[1]; - mesh_output.vertices[1].color = colors[1]; - - mesh_output.vertices[2].position = positions[2]; - mesh_output.vertices[2].color = colors[2]; - - mesh_output.primitives[0].indices = vec3(0, 1, 2); - mesh_output.primitives[0].cull = false; - mesh_output.primitives[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); -} - -@mesh(mesh_output) -@workgroup_size(2) -fn ms_divergent(@builtin(local_invocation_index) index: u32) { - // Workgroup with 2 threads. They return at different points. - if index == 0 { +@workgroup_size(64) +fn ms_no_ts(@builtin(local_invocation_id) thread_id: vec3) { + if thread_id.x == 0 { mesh_output.vertex_count = 3; mesh_output.primitive_count = 1; workgroupData = 2.0; diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 1519e9d896..5895888e07 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -2111,6 +2111,7 @@ impl super::Adapter { drop_callback: Option, enabled_extensions: &[&'static CStr], features: wgt::Features, + limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, family_index: u32, queue_index: u32, @@ -2366,6 +2367,10 @@ impl super::Adapter { // We need to build this separately for each invocation, so just default it out here binding_map: BTreeMap::default(), debug_info: None, + task_runtime_limits: Some(spv::TaskRuntimeLimits { + max_mesh_workgroups_per_dim: limits.max_task_mesh_workgroups_per_dimension, + max_mesh_workgroups_total: limits.max_task_mesh_workgroup_total_count, + }), } }; @@ -2478,6 +2483,7 @@ impl super::Adapter { pub unsafe fn open_with_callback<'a>( &self, features: wgt::Features, + limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, callback: Option>>, ) -> Result, crate::DeviceError> { @@ -2540,6 +2546,7 @@ impl super::Adapter { None, &enabled_extensions, features, + limits, memory_hints, family_info.queue_family_index, 0, @@ -2554,10 +2561,10 @@ impl crate::Adapter for super::Adapter { unsafe fn open( &self, features: wgt::Features, - _limits: &wgt::Limits, + limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, ) -> Result, crate::DeviceError> { - unsafe { self.open_with_callback(features, memory_hints, None) } + unsafe { self.open_with_callback(features, limits, memory_hints, None) } } unsafe fn texture_format_capabilities( diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index f804a208fb..d489ceead6 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -653,7 +653,8 @@ impl super::Device { || !runtime_checks.ray_query_initialization_tracking || !binding_map.is_empty() || naga_shader.debug_source.is_some() - || !stage.zero_initialize_workgroup_memory; + || !stage.zero_initialize_workgroup_memory + || !runtime_checks.task_shader_dispatch_tracking; let mut temp_options; let options = if needs_temp_options { temp_options = self.naga_options.clone(); @@ -686,6 +687,9 @@ impl super::Device { temp_options.zero_initialize_workgroup_memory = naga::back::spv::ZeroInitializeWorkgroupMemoryMode::None; } + if !runtime_checks.task_shader_dispatch_tracking { + temp_options.task_runtime_limits = None; + } &temp_options } else { diff --git a/wgpu-types/src/features.rs b/wgpu-types/src/features.rs index c0eebc84d9..7d96ab47ca 100644 --- a/wgpu-types/src/features.rs +++ b/wgpu-types/src/features.rs @@ -1153,6 +1153,10 @@ bitflags_array! { /// /// Naga is only supported on vulkan. On other platforms you will have to use passthrough shaders. /// + /// It is recommended to use [`Device::create_shader_module_trusted`] with [`ShaderRuntimeChecks::unchecked()`] + /// to avoid workgroup memory zero initialization, which can be expensive due to zero initialization being + /// single-threaded currently. + /// /// Some Mesa drivers including LLVMPIPE but not RADV fail to run the naga generated code. /// [This may be our bug and will be investigated.](https://github.com/gfx-rs/wgpu/issues/8727) /// However, due to the nature of the failure, the fact that it is unique, and the random changes @@ -1160,6 +1164,9 @@ bitflags_array! { /// [this Mesa issue.](https://gitlab.freedesktop.org/mesa/mesa/-/issues/14376) /// /// This is a native only feature. + /// + /// [`Device::create_shader_module_trusted`]: https://docs.rs/wgpu/latest/wgpu/struct.Device.html#method.create_shader_module_trusted + /// [`ShaderRuntimeChecks::unchecked()`]: crate::ShaderRuntimeChecks::unchecked const EXPERIMENTAL_MESH_SHADER = 1 << 48; /// ***THIS IS EXPERIMENTAL:*** Features enabled by this may have diff --git a/wgpu-types/src/shader.rs b/wgpu-types/src/shader.rs index f34ec963b0..0bb76edfbc 100644 --- a/wgpu-types/src/shader.rs +++ b/wgpu-types/src/shader.rs @@ -43,6 +43,9 @@ pub struct ShaderRuntimeChecks { /// /// It is the aim that these cases will not cause UB if this is set to true, but currently this will still happen on DX12 and Metal. pub ray_query_initialization_tracking: bool, + + /// If false, task shaders will not validate that the mesh shader grid they dispatch is within legal limits. + pub task_shader_dispatch_tracking: bool, } impl ShaderRuntimeChecks { @@ -76,6 +79,7 @@ impl ShaderRuntimeChecks { bounds_checks: all_checks, force_loop_bounding: all_checks, ray_query_initialization_tracking: all_checks, + task_shader_dispatch_tracking: all_checks, } } }