diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs index dcce866baca..1a80dcbb647 100644 --- a/naga/src/back/hlsl/mod.rs +++ b/naga/src/back/hlsl/mod.rs @@ -211,6 +211,9 @@ pub struct Options { pub zero_initialize_workgroup_memory: bool, /// Should we restrict indexing of vectors, matrices and arrays? pub restrict_indexing: bool, + /// If set, loops will have code injected into them, forcing the compiler + /// to think the number of iterations is bounded. + pub force_loop_bounding: bool, } impl Default for Options { @@ -223,6 +226,7 @@ impl Default for Options { push_constants_target: None, zero_initialize_workgroup_memory: true, restrict_indexing: true, + force_loop_bounding: true, } } } diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index b5df1357665..f74ee65c770 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -131,6 +131,32 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { self.need_bake_expressions.clear(); } + /// Generates statements to be inserted immediately before and inside the + /// body of each loop, to defeat infinite loop reasoning. The 0th item + /// of the returned tuple should be inserted immediately prior to the loop + /// and the 1st item should be inserted inside the loop body. + /// + /// See [`back::msl::Writer::gen_force_bounded_loop_statements`] for details. + fn gen_force_bounded_loop_statements( + &mut self, + level: back::Level, + ) -> Option<(String, String)> { + if !self.options.force_loop_bounding { + return None; + } + + let loop_bound_name = self.namer.call("loop_bound"); + let decl = format!("{level}uint2 {loop_bound_name} = uint2(0u, 0u);"); + let level = level.next(); + let max = u32::MAX; + let break_and_inc = format!( + "{level}if (all({loop_bound_name} == uint2({max}u, {max}u))) {{ break; }} +{level}{loop_bound_name} += uint2({loop_bound_name}.y == {max}u, 1u);" + ); + + Some((decl, break_and_inc)) + } + /// Helper method used to find which expressions of a given function require baking /// /// # Notes @@ -2048,6 +2074,10 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { ref continuing, break_if, } => { + let force_loop_bound_statements = self.gen_force_bounded_loop_statements(level); + if let Some((ref decl, _)) = force_loop_bound_statements { + writeln!(self.out, "{decl}")?; + } self.continue_ctx.enter_loop(); let l2 = level.next(); if !continuing.is_empty() || break_if.is_some() { @@ -2075,6 +2105,11 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { for sta in body.iter() { self.write_stmt(module, sta, func_ctx, l2)?; } + + if let Some((_, ref break_and_inc)) = force_loop_bound_statements { + writeln!(self.out, "{break_and_inc}")?; + } + writeln!(self.out, "{level}}}")?; self.continue_ctx.exit_loop(); } diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index 4589d39892a..9c0285e1179 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -383,11 +383,6 @@ pub struct Writer { /// Set of (struct type, struct field index) denoting which fields require /// padding inserted **before** them (i.e. between fields at index - 1 and index) struct_member_pads: FastHashSet<(Handle, u32)>, - - /// Name of the force-bounded-loop macro. - /// - /// See `emit_force_bounded_loop_macro` for details. - force_bounded_loop_macro_name: String, } impl crate::Scalar { @@ -601,7 +596,7 @@ struct ExpressionContext<'a> { /// accesses. These may need to be cached in temporary variables. See /// `index::find_checked_indexes` for details. guarded_indices: HandleSet, - /// See [`Writer::emit_force_bounded_loop_macro`] for details. + /// See [`Writer::gen_force_bounded_loop_statements`] for details. force_loop_bounding: bool, } @@ -685,7 +680,6 @@ impl Writer { #[cfg(test)] put_block_stack_pointers: Default::default(), struct_member_pads: FastHashSet::default(), - force_bounded_loop_macro_name: String::default(), } } @@ -696,17 +690,10 @@ impl Writer { self.out } - /// Define a macro to invoke at the bottom of each loop body, to - /// defeat MSL infinite loop reasoning. - /// - /// If we haven't done so already, emit the definition of a preprocessor - /// macro to be invoked at the end of each loop body in the generated MSL, - /// to ensure that the MSL compiler's optimizations do not remove bounds - /// checks. - /// - /// Only the first call to this function for a given module actually causes - /// the macro definition to be written. Subsequent loops can simply use the - /// prior macro definition, since macros aren't block-scoped. + /// Generates statements to be inserted immediately before and inside the + /// body of each loop, to defeat MSL infinite loop reasoning. The 0th item + /// of the returned tuple should be inserted immediately prior to the loop + /// and the 1st item should be inserted inside the loop body. /// /// # What is this trying to solve? /// @@ -774,7 +761,8 @@ impl Writer { /// but which in fact generates no instructions. Unfortunately, inline /// assembly is not handled correctly by some Metal device drivers. /// - /// Instead, we add the following code to the bottom of every loop: + /// A previously used approach was to add the following code to the bottom + /// of every loop: /// /// ```ignore /// if (volatile bool unpredictable = false; unpredictable) @@ -785,37 +773,47 @@ impl Writer { /// the `volatile` qualifier prevents the compiler from assuming this. Thus, /// it must assume that the `break` might be reached, and hence that the /// loop is not unbounded. This prevents the range analysis impact described - /// above. + /// above. Unfortunately this prevented the compiler from making important, + /// and safe, optimizations such as loop unrolling and was observed to + /// significantly hurt performance. + /// + /// Our current approach declares a counter before every loop and + /// increments it every iteration, breaking after 2^64 iterations: /// - /// Unfortunately, what makes this a kludge, not a hack, is that this - /// solution leaves the GPU executing a pointless conditional branch, at - /// runtime, in every iteration of the loop. There's no part of the system - /// that has a global enough view to be sure that `unpredictable` is true, - /// and remove it from the code. Adding the branch also affects - /// optimization: for example, it's impossible to unroll this loop. This - /// transformation has been observed to significantly hurt performance. + /// ```ignore + /// uint2 loop_bound = uint2(0); + /// while (true) { + /// if (metal::all(loop_bound == uint2(4294967295))) { break; } + /// loop_bound += uint2(loop_bound.y == 4294967295, 1); + /// } + /// ``` /// - /// To make our output a bit more legible, we pull the condition out into a - /// preprocessor macro defined at the top of the module. + /// This convinces the compiler that the loop is finite and therefore may + /// execute, whilst at the same time allowing optimizations such as loop + /// unrolling. Furthermore the 64-bit counter is large enough it seems + /// implausible that it would affect the execution of any shader. /// /// This approach is also used by Chromium WebGPU's Dawn shader compiler: - /// - fn emit_force_bounded_loop_macro(&mut self) -> BackendResult { - if !self.force_bounded_loop_macro_name.is_empty() { - return Ok(()); + /// + fn gen_force_bounded_loop_statements( + &mut self, + level: back::Level, + context: &StatementContext, + ) -> Option<(String, String)> { + if !context.expression.force_loop_bounding { + return None; } - self.force_bounded_loop_macro_name = self.namer.call("LOOP_IS_BOUNDED"); - let loop_bounded_volatile_name = self.namer.call("unpredictable_break_from_loop"); - writeln!( - self.out, - "#define {} {{ volatile bool {} = false; if ({}) break; }}", - self.force_bounded_loop_macro_name, - loop_bounded_volatile_name, - loop_bounded_volatile_name, - )?; + let loop_bound_name = self.namer.call("loop_bound"); + let decl = format!("{level}uint2 {loop_bound_name} = uint2(0u);"); + let level = level.next(); + let max = u32::MAX; + let break_and_inc = format!( + "{level}if ({NAMESPACE}::all({loop_bound_name} == uint2({max}u))) {{ break; }} +{level}{loop_bound_name} += uint2({loop_bound_name}.y == {max}u, 1u);" + ); - Ok(()) + Some((decl, break_and_inc)) } fn put_call_parameters( @@ -3083,6 +3081,11 @@ impl Writer { ref continuing, break_if, } => { + let force_loop_bound_statements = + self.gen_force_bounded_loop_statements(level, context); + if let Some((ref decl, _)) = force_loop_bound_statements { + writeln!(self.out, "{decl}")?; + } if !continuing.is_empty() || break_if.is_some() { let gate_name = self.namer.call("loop_init"); writeln!(self.out, "{level}bool {gate_name} = true;")?; @@ -3104,15 +3107,11 @@ impl Writer { writeln!(self.out, "{level}while(true) {{",)?; } self.put_block(level.next(), body, context)?; - if context.expression.force_loop_bounding { - self.emit_force_bounded_loop_macro()?; - writeln!( - self.out, - "{}{}", - level.next(), - self.force_bounded_loop_macro_name - )?; + + if let Some((_, ref break_and_inc)) = force_loop_bound_statements { + writeln!(self.out, "{break_and_inc}")?; } + writeln!(self.out, "{level}}}")?; } crate::Statement::Break => { @@ -3606,7 +3605,6 @@ impl Writer { &[CLAMPED_LOD_LOAD_PREFIX], &mut self.names, ); - self.force_bounded_loop_macro_name.clear(); self.struct_member_pads.clear(); writeln!( diff --git a/naga/tests/out/hlsl/boids.hlsl b/naga/tests/out/hlsl/boids.hlsl index 22e9c6cefd7..fd03af522fe 100644 --- a/naga/tests/out/hlsl/boids.hlsl +++ b/naga/tests/out/hlsl/boids.hlsl @@ -41,6 +41,7 @@ void main(uint3 global_invocation_id : SV_DispatchThreadID) vPos = _e8; float2 _e14 = asfloat(particlesSrc.Load2(8+index*16+0)); vVel = _e14; + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { if (!loop_init) { @@ -91,6 +92,8 @@ void main(uint3 global_invocation_id : SV_DispatchThreadID) int _e88 = cVelCount; cVelCount = (_e88 + 1); } + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } int _e94 = cMassCount; if ((_e94 > 0)) { diff --git a/naga/tests/out/hlsl/break-if.hlsl b/naga/tests/out/hlsl/break-if.hlsl index 63a01855833..c4c22e1f2c1 100644 --- a/naga/tests/out/hlsl/break-if.hlsl +++ b/naga/tests/out/hlsl/break-if.hlsl @@ -1,5 +1,6 @@ void breakIfEmpty() { + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { if (!loop_init) { @@ -8,6 +9,8 @@ void breakIfEmpty() } } loop_init = false; + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } return; } @@ -17,6 +20,7 @@ void breakIfEmptyBody(bool a) bool b = (bool)0; bool c = (bool)0; + uint2 loop_bound_1 = uint2(0u, 0u); bool loop_init_1 = true; while(true) { if (!loop_init_1) { @@ -29,6 +33,8 @@ void breakIfEmptyBody(bool a) } } loop_init_1 = false; + if (all(loop_bound_1 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); } return; } @@ -38,6 +44,7 @@ void breakIf(bool a_1) bool d = (bool)0; bool e = (bool)0; + uint2 loop_bound_2 = uint2(0u, 0u); bool loop_init_2 = true; while(true) { if (!loop_init_2) { @@ -50,6 +57,8 @@ void breakIf(bool a_1) d = a_1; bool _e2 = d; e = (a_1 != _e2); + if (all(loop_bound_2 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); } return; } @@ -58,6 +67,7 @@ void breakIfSeparateVariable() { uint counter = 0u; + uint2 loop_bound_3 = uint2(0u, 0u); bool loop_init_3 = true; while(true) { if (!loop_init_3) { @@ -69,6 +79,8 @@ void breakIfSeparateVariable() loop_init_3 = false; uint _e3 = counter; counter = (_e3 + 1u); + if (all(loop_bound_3 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); } return; } diff --git a/naga/tests/out/hlsl/collatz.hlsl b/naga/tests/out/hlsl/collatz.hlsl index b00586aa4c2..fb362e84999 100644 --- a/naga/tests/out/hlsl/collatz.hlsl +++ b/naga/tests/out/hlsl/collatz.hlsl @@ -6,6 +6,7 @@ uint collatz_iterations(uint n_base) uint i = 0u; n = n_base; + uint2 loop_bound = uint2(0u, 0u); while(true) { uint _e4 = n; if ((_e4 > 1u)) { @@ -24,6 +25,8 @@ uint collatz_iterations(uint n_base) uint _e20 = i; i = (_e20 + 1u); } + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } uint _e23 = i; return _e23; diff --git a/naga/tests/out/hlsl/control-flow.hlsl b/naga/tests/out/hlsl/control-flow.hlsl index 2438858a8a1..cc229dbb3c2 100644 --- a/naga/tests/out/hlsl/control-flow.hlsl +++ b/naga/tests/out/hlsl/control-flow.hlsl @@ -20,6 +20,7 @@ void switch_case_break() void loop_switch_continue(int x) { + uint2 loop_bound = uint2(0u, 0u); while(true) { bool should_continue = false; switch(x) { @@ -34,12 +35,15 @@ void loop_switch_continue(int x) if (should_continue) { continue; } + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } return; } void loop_switch_continue_nesting(int x_1, int y, int z) { + uint2 loop_bound_1 = uint2(0u, 0u); while(true) { bool should_continue_1 = false; switch(x_1) { @@ -54,6 +58,7 @@ void loop_switch_continue_nesting(int x_1, int y, int z) break; } default: { + uint2 loop_bound_2 = uint2(0u, 0u); while(true) { bool should_continue_2 = false; switch(z) { @@ -68,6 +73,8 @@ void loop_switch_continue_nesting(int x_1, int y, int z) if (should_continue_2) { continue; } + if (all(loop_bound_2 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); } break; } @@ -92,7 +99,10 @@ void loop_switch_continue_nesting(int x_1, int y, int z) if (should_continue_3) { continue; } + if (all(loop_bound_1 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); } + uint2 loop_bound_3 = uint2(0u, 0u); while(true) { bool should_continue_4 = false; do { @@ -107,6 +117,8 @@ void loop_switch_continue_nesting(int x_1, int y, int z) if (should_continue_4) { continue; } + if (all(loop_bound_3 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); } return; } @@ -115,6 +127,7 @@ void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w) { int pos_1 = 0; + uint2 loop_bound_4 = uint2(0u, 0u); while(true) { bool should_continue_5 = false; switch(x_2) { @@ -126,7 +139,10 @@ void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w) break; } } + if (all(loop_bound_4 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_4 += uint2(loop_bound_4.y == 4294967295u, 1u); } + uint2 loop_bound_5 = uint2(0u, 0u); while(true) { bool should_continue_6 = false; switch(x_2) { @@ -164,6 +180,8 @@ void loop_switch_omit_continue_variable_checks(int x_2, int y_1, int z_1, int w) if (should_continue_6) { continue; } + if (all(loop_bound_5 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_5 += uint2(loop_bound_5.y == 4294967295u, 1u); } return; } diff --git a/naga/tests/out/hlsl/do-while.hlsl b/naga/tests/out/hlsl/do-while.hlsl index ca7d42e1e76..766b1ce9cbb 100644 --- a/naga/tests/out/hlsl/do-while.hlsl +++ b/naga/tests/out/hlsl/do-while.hlsl @@ -1,5 +1,6 @@ void fb1_(inout bool cond) { + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { if (!loop_init) { @@ -10,6 +11,8 @@ void fb1_(inout bool cond) } loop_init = false; continue; + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } return; } diff --git a/naga/tests/out/hlsl/ray-query.hlsl b/naga/tests/out/hlsl/ray-query.hlsl index 9a0a2da1ce2..8f8684bcffc 100644 --- a/naga/tests/out/hlsl/ray-query.hlsl +++ b/naga/tests/out/hlsl/ray-query.hlsl @@ -84,6 +84,7 @@ RayIntersection query_loop(float3 pos, float3 dir, RaytracingAccelerationStructu RayQuery rq_1; rq_1.TraceRayInline(acs, ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir).flags, ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir).cull_mask, RayDescFromRayDesc_(ConstructRayDesc_(4u, 255u, 0.1, 100.0, pos, dir))); + uint2 loop_bound = uint2(0u, 0u); while(true) { const bool _e9 = rq_1.Proceed(); if (_e9) { @@ -92,6 +93,8 @@ RayIntersection query_loop(float3 pos, float3 dir, RaytracingAccelerationStructu } { } + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } const RayIntersection rayintersection = GetCommittedIntersection(rq_1); return rayintersection; diff --git a/naga/tests/out/hlsl/shadow.hlsl b/naga/tests/out/hlsl/shadow.hlsl index c0431bfef9e..346215e7c34 100644 --- a/naga/tests/out/hlsl/shadow.hlsl +++ b/naga/tests/out/hlsl/shadow.hlsl @@ -92,6 +92,7 @@ float4 fs_main(FragmentInput_fs_main fragmentinput_fs_main) : SV_Target0 uint i = 0u; float3 normal_1 = normalize(in_.world_normal); + uint2 loop_bound = uint2(0u, 0u); bool loop_init = true; while(true) { if (!loop_init) { @@ -115,6 +116,8 @@ float4 fs_main(FragmentInput_fs_main fragmentinput_fs_main) : SV_Target0 float3 _e37 = color; color = (_e37 + ((_e23 * diffuse) * light.color.xyz)); } + if (all(loop_bound == uint2(4294967295u, 4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } float3 _e42 = color; float4 _e47 = u_entity.color; @@ -128,6 +131,7 @@ float4 fs_main_without_storage(FragmentInput_fs_main_without_storage fragmentinp uint i_1 = 0u; float3 normal_2 = normalize(in_1.world_normal); + uint2 loop_bound_1 = uint2(0u, 0u); bool loop_init_1 = true; while(true) { if (!loop_init_1) { @@ -151,6 +155,8 @@ float4 fs_main_without_storage(FragmentInput_fs_main_without_storage fragmentinp float3 _e37 = color_1; color_1 = (_e37 + ((_e23 * diffuse_1) * light_1.color.xyz)); } + if (all(loop_bound_1 == uint2(4294967295u, 4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); } float3 _e42 = color_1; float4 _e47 = u_entity.color; diff --git a/naga/tests/out/msl/atomicCompareExchange.msl b/naga/tests/out/msl/atomicCompareExchange.msl index 6655fad7e2a..90972e872b5 100644 --- a/naga/tests/out/msl/atomicCompareExchange.msl +++ b/naga/tests/out/msl/atomicCompareExchange.msl @@ -76,6 +76,7 @@ kernel void test_atomic_compare_exchange_i32_( uint i = 0u; int old = {}; bool exchanged = {}; + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { if (!loop_init) { @@ -93,6 +94,7 @@ kernel void test_atomic_compare_exchange_i32_( int _e8 = metal::atomic_load_explicit(&arr_i32_.inner[_e6], metal::memory_order_relaxed); old = _e8; exchanged = false; + uint2 loop_bound_1 = uint2(0u); while(true) { bool _e12 = exchanged; if (!(_e12)) { @@ -108,11 +110,12 @@ kernel void test_atomic_compare_exchange_i32_( old = _e23.old_value; exchanged = _e23.exchanged; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); } } - LOOP_IS_BOUNDED + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } return; } @@ -124,6 +127,7 @@ kernel void test_atomic_compare_exchange_u32_( uint i_1 = 0u; uint old_1 = {}; bool exchanged_1 = {}; + uint2 loop_bound_2 = uint2(0u); bool loop_init_1 = true; while(true) { if (!loop_init_1) { @@ -141,6 +145,7 @@ kernel void test_atomic_compare_exchange_u32_( uint _e8 = metal::atomic_load_explicit(&arr_u32_.inner[_e6], metal::memory_order_relaxed); old_1 = _e8; exchanged_1 = false; + uint2 loop_bound_3 = uint2(0u); while(true) { bool _e12 = exchanged_1; if (!(_e12)) { @@ -156,10 +161,12 @@ kernel void test_atomic_compare_exchange_u32_( old_1 = _e23.old_value; exchanged_1 = _e23.exchanged; } - LOOP_IS_BOUNDED + if (metal::all(loop_bound_3 == uint2(4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); } } - LOOP_IS_BOUNDED + if (metal::all(loop_bound_2 == uint2(4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); } return; } diff --git a/naga/tests/out/msl/boids.msl b/naga/tests/out/msl/boids.msl index 07acd7cf623..0dc2e2fd674 100644 --- a/naga/tests/out/msl/boids.msl +++ b/naga/tests/out/msl/boids.msl @@ -55,6 +55,7 @@ kernel void main_( vPos = _e8; metal::float2 _e14 = particlesSrc.particles[index].vel; vVel = _e14; + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { if (!loop_init) { @@ -105,8 +106,8 @@ kernel void main_( int _e88 = cVelCount; cVelCount = _e88 + 1; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } int _e94 = cMassCount; if (_e94 > 0) { diff --git a/naga/tests/out/msl/break-if.msl b/naga/tests/out/msl/break-if.msl index 4d3397234b6..5c96e837b39 100644 --- a/naga/tests/out/msl/break-if.msl +++ b/naga/tests/out/msl/break-if.msl @@ -7,6 +7,7 @@ using metal::uint; void breakIfEmpty( ) { + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { if (!loop_init) { @@ -15,8 +16,8 @@ void breakIfEmpty( } } loop_init = false; -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } return; } @@ -26,6 +27,7 @@ void breakIfEmptyBody( ) { bool b = {}; bool c = {}; + uint2 loop_bound_1 = uint2(0u); bool loop_init_1 = true; while(true) { if (!loop_init_1) { @@ -38,7 +40,8 @@ void breakIfEmptyBody( } } loop_init_1 = false; - LOOP_IS_BOUNDED + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); } return; } @@ -48,6 +51,7 @@ void breakIf( ) { bool d = {}; bool e = {}; + uint2 loop_bound_2 = uint2(0u); bool loop_init_2 = true; while(true) { if (!loop_init_2) { @@ -60,7 +64,8 @@ void breakIf( d = a_1; bool _e2 = d; e = a_1 != _e2; - LOOP_IS_BOUNDED + if (metal::all(loop_bound_2 == uint2(4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); } return; } @@ -68,6 +73,7 @@ void breakIf( void breakIfSeparateVariable( ) { uint counter = 0u; + uint2 loop_bound_3 = uint2(0u); bool loop_init_3 = true; while(true) { if (!loop_init_3) { @@ -79,7 +85,8 @@ void breakIfSeparateVariable( loop_init_3 = false; uint _e3 = counter; counter = _e3 + 1u; - LOOP_IS_BOUNDED + if (metal::all(loop_bound_3 == uint2(4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); } return; } diff --git a/naga/tests/out/msl/collatz.msl b/naga/tests/out/msl/collatz.msl index e282d13abb1..81aca4a5222 100644 --- a/naga/tests/out/msl/collatz.msl +++ b/naga/tests/out/msl/collatz.msl @@ -19,6 +19,7 @@ uint collatz_iterations( uint n = {}; uint i = 0u; n = n_base; + uint2 loop_bound = uint2(0u); while(true) { uint _e4 = n; if (_e4 > 1u) { @@ -37,8 +38,8 @@ uint collatz_iterations( uint _e20 = i; i = _e20 + 1u; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } uint _e23 = i; return _e23; diff --git a/naga/tests/out/msl/control-flow.msl b/naga/tests/out/msl/control-flow.msl index 1b35249f362..7dcf30fb461 100644 --- a/naga/tests/out/msl/control-flow.msl +++ b/naga/tests/out/msl/control-flow.msl @@ -31,6 +31,7 @@ void switch_case_break( void loop_switch_continue( int x ) { + uint2 loop_bound = uint2(0u); while(true) { switch(x) { case 1: { @@ -40,8 +41,8 @@ void loop_switch_continue( break; } } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } return; } @@ -51,6 +52,7 @@ void loop_switch_continue_nesting( int y, int z ) { + uint2 loop_bound_1 = uint2(0u); while(true) { switch(x_1) { case 1: { @@ -62,6 +64,7 @@ void loop_switch_continue_nesting( continue; } default: { + uint2 loop_bound_2 = uint2(0u); while(true) { switch(z) { case 1: { @@ -71,7 +74,8 @@ void loop_switch_continue_nesting( break; } } - LOOP_IS_BOUNDED + if (metal::all(loop_bound_2 == uint2(4294967295u))) { break; } + loop_bound_2 += uint2(loop_bound_2.y == 4294967295u, 1u); } break; } @@ -87,8 +91,10 @@ void loop_switch_continue_nesting( continue; } } - LOOP_IS_BOUNDED + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); } + uint2 loop_bound_3 = uint2(0u); while(true) { switch(y) { case 1: @@ -101,7 +107,8 @@ void loop_switch_continue_nesting( break; } } - LOOP_IS_BOUNDED + if (metal::all(loop_bound_3 == uint2(4294967295u))) { break; } + loop_bound_3 += uint2(loop_bound_3.y == 4294967295u, 1u); } return; } @@ -113,6 +120,7 @@ void loop_switch_omit_continue_variable_checks( int w ) { int pos_1 = 0; + uint2 loop_bound_4 = uint2(0u); while(true) { switch(x_2) { case 1: { @@ -123,8 +131,10 @@ void loop_switch_omit_continue_variable_checks( break; } } - LOOP_IS_BOUNDED + if (metal::all(loop_bound_4 == uint2(4294967295u))) { break; } + loop_bound_4 += uint2(loop_bound_4.y == 4294967295u, 1u); } + uint2 loop_bound_5 = uint2(0u); while(true) { switch(x_2) { case 1: { @@ -154,7 +164,8 @@ void loop_switch_omit_continue_variable_checks( break; } } - LOOP_IS_BOUNDED + if (metal::all(loop_bound_5 == uint2(4294967295u))) { break; } + loop_bound_5 += uint2(loop_bound_5.y == 4294967295u, 1u); } return; } diff --git a/naga/tests/out/msl/do-while.msl b/naga/tests/out/msl/do-while.msl index 2a883304d13..4b437a78599 100644 --- a/naga/tests/out/msl/do-while.msl +++ b/naga/tests/out/msl/do-while.msl @@ -8,6 +8,7 @@ using metal::uint; void fb1_( thread bool& cond ) { + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { if (!loop_init) { @@ -18,8 +19,8 @@ void fb1_( } loop_init = false; continue; -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } return; } diff --git a/naga/tests/out/msl/overrides-ray-query.msl b/naga/tests/out/msl/overrides-ray-query.msl index 3aa0ee03598..0b6aebfc851 100644 --- a/naga/tests/out/msl/overrides-ray-query.msl +++ b/naga/tests/out/msl/overrides-ray-query.msl @@ -33,6 +33,7 @@ kernel void main_( rq.intersector.force_opacity((desc.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (desc.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none); rq.intersector.accept_any_intersection((desc.flags & 4) != 0); rq.intersection = rq.intersector.intersect(metal::raytracing::ray(desc.origin, desc.dir, desc.tmin, desc.tmax), acc_struct, desc.cull_mask); rq.ready = true; + uint2 loop_bound = uint2(0u); while(true) { bool _e31 = rq.ready; rq.ready = false; @@ -40,8 +41,8 @@ kernel void main_( } else { break; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } return; } diff --git a/naga/tests/out/msl/ray-query.msl b/naga/tests/out/msl/ray-query.msl index b8230fb2e81..a5b8b5f556f 100644 --- a/naga/tests/out/msl/ray-query.msl +++ b/naga/tests/out/msl/ray-query.msl @@ -53,6 +53,7 @@ RayIntersection query_loop( rq_1.intersector.force_opacity((_e8.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (_e8.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none); rq_1.intersector.accept_any_intersection((_e8.flags & 4) != 0); rq_1.intersection = rq_1.intersector.intersect(metal::raytracing::ray(_e8.origin, _e8.dir, _e8.tmin, _e8.tmax), acs, _e8.cull_mask); rq_1.ready = true; + uint2 loop_bound = uint2(0u); while(true) { bool _e9 = rq_1.ready; rq_1.ready = false; @@ -60,8 +61,8 @@ RayIntersection query_loop( } else { break; } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } return RayIntersection {_map_intersection_type(rq_1.intersection.type), rq_1.intersection.distance, rq_1.intersection.user_instance_id, rq_1.intersection.instance_id, {}, rq_1.intersection.geometry_id, rq_1.intersection.primitive_id, rq_1.intersection.triangle_barycentric_coord, rq_1.intersection.triangle_front_facing, {}, rq_1.intersection.object_to_world_transform, rq_1.intersection.world_to_object_transform}; } diff --git a/naga/tests/out/msl/shadow.msl b/naga/tests/out/msl/shadow.msl index 18cc8421106..d8d35eab5b0 100644 --- a/naga/tests/out/msl/shadow.msl +++ b/naga/tests/out/msl/shadow.msl @@ -100,6 +100,7 @@ fragment fs_mainOutput fs_main( metal::float3 color = c_ambient; uint i = 0u; metal::float3 normal_1 = metal::normalize(in.world_normal); + uint2 loop_bound = uint2(0u); bool loop_init = true; while(true) { if (!loop_init) { @@ -123,8 +124,8 @@ fragment fs_mainOutput fs_main( metal::float3 _e37 = color; color = _e37 + ((_e23 * diffuse) * light.color.xyz); } -#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } - LOOP_IS_BOUNDED + if (metal::all(loop_bound == uint2(4294967295u))) { break; } + loop_bound += uint2(loop_bound.y == 4294967295u, 1u); } metal::float3 _e42 = color; metal::float4 _e47 = u_entity.color; @@ -152,6 +153,7 @@ fragment fs_main_without_storageOutput fs_main_without_storage( metal::float3 color_1 = c_ambient; uint i_1 = 0u; metal::float3 normal_2 = metal::normalize(in_1.world_normal); + uint2 loop_bound_1 = uint2(0u); bool loop_init_1 = true; while(true) { if (!loop_init_1) { @@ -175,7 +177,8 @@ fragment fs_main_without_storageOutput fs_main_without_storage( metal::float3 _e37 = color_1; color_1 = _e37 + ((_e23 * diffuse_1) * light_1.color.xyz); } - LOOP_IS_BOUNDED + if (metal::all(loop_bound_1 == uint2(4294967295u))) { break; } + loop_bound_1 += uint2(loop_bound_1.y == 4294967295u, 1u); } metal::float3 _e42 = color_1; metal::float4 _e47 = u_entity.color; diff --git a/tests/tests/subgroup_operations/mod.rs b/tests/tests/subgroup_operations/mod.rs index 25fddf120db..afffdb0b979 100644 --- a/tests/tests/subgroup_operations/mod.rs +++ b/tests/tests/subgroup_operations/mod.rs @@ -17,15 +17,21 @@ static SUBGROUP_OPERATIONS: GpuTestConfiguration = GpuTestConfiguration::new() // are not matched against. .expect_fail( wgpu_test::FailureCase::molten_vk() - // 14.3 doesn't fail test 29 + // 15.0 doesn't fail test 17 .panic("thread 0 failed tests: 27,\nthread 1 failed tests: 27, 28,\n") + // 14.3 doesn't fail test 29 + .panic("thread 0 failed tests: 27,\nthread 1 failed tests: 17, 27, 28,\n") // Prior versions do. - .panic("thread 0 failed tests: 27, 29,\nthread 1 failed tests: 27, 28, 29,\n"), + .panic( + "thread 0 failed tests: 27, 29,\nthread 1 failed tests: 17, 27, 28, 29,\n", + ), ) .expect_fail( wgpu_test::FailureCase::backend(wgpu::Backends::METAL) - // 14.3 doesn't fail test 29 + // 15.0 doesn't fail test 17 .panic("thread 0 failed tests: 27,\nthread 1 failed tests: 27, 28,\n") + // 14.3 doesn't fail test 29 + .panic("thread 0 failed tests: 27,\nthread 1 failed tests: 17, 27, 28,\n") // Prior versions do. .panic("thread 0 failed tests: 27, 29,\nthread 1 failed tests: 27, 28, 29,\n"), ), diff --git a/tests/tests/subgroup_operations/shader.wgsl b/tests/tests/subgroup_operations/shader.wgsl index 77cb81ce750..ff25f240a68 100644 --- a/tests/tests/subgroup_operations/shader.wgsl +++ b/tests/tests/subgroup_operations/shader.wgsl @@ -90,6 +90,7 @@ fn main( } add_result_to_mask(&passed, 16u, subgroupExclusiveMul(global_id.x + 1u) == expected); + // Mac/Apple will sometimes fail this test. MacOS 15.0 passes it, so the bug in the metal compiler seems to be fixed. expected = 0u; for(var i = 0u; i <= subgroup_invocation_id; i += 1u) { expected += global_id.x - subgroup_invocation_id + i + 1u; diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index b9a825845aa..5c99b038e70 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -280,12 +280,15 @@ impl super::Device { let needs_temp_options = stage.zero_initialize_workgroup_memory != layout.naga_options.zero_initialize_workgroup_memory - || stage.module.runtime_checks.bounds_checks != layout.naga_options.restrict_indexing; + || stage.module.runtime_checks.bounds_checks != layout.naga_options.restrict_indexing + || stage.module.runtime_checks.force_loop_bounding + != layout.naga_options.force_loop_bounding; let mut temp_options; let naga_options = if needs_temp_options { temp_options = layout.naga_options.clone(); temp_options.zero_initialize_workgroup_memory = stage.zero_initialize_workgroup_memory; temp_options.restrict_indexing = stage.module.runtime_checks.bounds_checks; + temp_options.force_loop_bounding = stage.module.runtime_checks.force_loop_bounding; &temp_options } else { &layout.naga_options @@ -1241,6 +1244,7 @@ impl crate::Device for super::Device { push_constants_target, zero_initialize_workgroup_memory: true, restrict_indexing: true, + force_loop_bounding: true, }, }) }