@@ -864,13 +864,14 @@ broadcast_unalias(::Nothing, src) = src
864864
865865# Preprocessing a `Broadcasted` does two things:
866866# * unaliases any arguments from `dest`
867- # * "extrudes" the arguments where it is advantageous to pre-compute the broadcasted indices
868- @inline preprocess (dest, bc:: Broadcasted{Style} ) where {Style} = Broadcasted {Style} (bc. f, preprocess_args (dest, bc. args), bc. axes)
869- preprocess (dest, x) = extrude (broadcast_unalias (dest, x))
867+ # * calls `f` on the arguments (typically `extrude`, which pre-computes the broadcasted indices where advantageous)
868+ @inline preprocess (dest, bc) = preprocess (extrude, dest, bc)
869+ @inline preprocess (f, dest, bc:: Broadcasted{Style} ) where {Style} = Broadcasted {Style} (bc. f, preprocess_args (f, dest, bc. args), bc. axes)
870+ preprocess (f, dest, x) = f (broadcast_unalias (dest, x))
870871
871- @inline preprocess_args (dest, args:: Tuple ) = (preprocess (dest, args[1 ]), preprocess_args (dest, tail (args))... )
872- preprocess_args (dest, args:: Tuple{Any} ) = (preprocess (dest, args[1 ]),)
873- preprocess_args (dest, args:: Tuple{} ) = ()
872+ @inline preprocess_args (f, dest, args:: Tuple ) = (preprocess (f, dest, args[1 ]), preprocess_args (f, dest, tail (args))... )
873+ preprocess_args (f, dest, args:: Tuple{Any} ) = (preprocess (f, dest, args[1 ]),)
874+ preprocess_args (f, dest, args:: Tuple{} ) = ()
874875
875876# Specialize this method if all you want to do is specialize on typeof(dest)
876877@inline function copyto! (dest:: AbstractArray , bc:: Broadcasted{Nothing} )
@@ -882,13 +883,45 @@ preprocess_args(dest, args::Tuple{}) = ()
882883 return copyto! (dest, A)
883884 end
884885 end
885- bc′ = preprocess (dest, bc)
886- @simd for I in eachindex (bc′)
887- @inbounds dest[I] = bc′[I]
886+ # Ugly performance hack around issue #28126: determine if all arguments to the
887+ # broadcast are sized such that the broadcasting core can statically determine
888+ # whether a given dimension is "extruded" or not. If so, we don't need to check
889+ # any array sizes within the inner loop. Ideally this really should be something
890+ # that Julia and/or LLVM could figure out and eliminate... and indeed they can
891+ # for limited numbers of arguments.
892+ if _is_static_broadcast_28126 (dest, bc)
893+ bcs′ = preprocess (nonextrude_28126, dest, bc)
894+ @simd for I in eachindex (bcs′)
895+ @inbounds dest[I] = bcs′[I]
896+ end
897+ else
898+ bc′ = preprocess (extrude, dest, bc)
899+ @simd for I in eachindex (bc′)
900+ @inbounds dest[I] = bc′[I]
901+ end
888902 end
889903 return dest
890904end
891905
906+ @inline _is_static_broadcast_28126 (dest, bc:: Broadcasted{Style} ) where {Style} = _is_static_broadcast_28126_args (dest, bc. args)
907+ _is_static_broadcast_28126 (dest, x) = false
908+ _is_static_broadcast_28126 (dest, x:: Union{Ref, Tuple, Type, Number, AbstractArray{<:Any,0}} ) = true
909+ _is_static_broadcast_28126 (dest:: AbstractArray , x:: AbstractArray{<:Any,0} ) = true
910+ _is_static_broadcast_28126 (dest:: AbstractArray , x:: AbstractArray{<:Any,1} ) = axes (dest, 1 ) == axes (x, 1 )
911+ _is_static_broadcast_28126 (dest:: AbstractArray , x:: AbstractArray ) = axes (dest) == axes (x) # This can be better with other missing dimensions
912+
913+ @inline _is_static_broadcast_28126_args (dest, args:: Tuple ) = _is_static_broadcast_28126 (dest, args[1 ]) && _is_static_broadcast_28126_args (dest, tail (args))
914+ _is_static_broadcast_28126_args (dest, args:: Tuple{Any} ) = _is_static_broadcast_28126 (dest, args[1 ])
915+ _is_static_broadcast_28126_args (dest, args:: Tuple{} ) = true
916+
917+ struct _NonExtruded28126{T}
918+ x:: T
919+ end
920+ @inline axes (b:: _NonExtruded28126 ) = axes (b. x)
921+ Base. @propagate_inbounds _broadcast_getindex (b:: _NonExtruded28126 , i) = b. x[i]
922+ _nonextrude_28126 (x:: AbstractArray ) = _NonExtruded28126 (x)
923+ _nonextrude_28126 (x) = x
924+
892925# Performance optimization: for BitArray outputs, we cache the result
893926# in a "small" Vector{Bool}, and then copy in chunks into the output
894927@inline function copyto! (dest:: BitArray , bc:: Broadcasted{Nothing} )
0 commit comments