JuliaHealth · cncastillo · Jul 19, 2024 · Jul 11, 2024 · Jul 11, 2024 · Jul 11, 2024
diff --git a/.github/workflows/Benchmark.yml b/.github/workflows/Benchmark.yml
@@ -32,7 +32,7 @@ on:
 
 jobs:
   benchmark:
-    if: "!contains(github.event.head_commit.message, '[skip benchmarks]')"
+    if: ${{ !contains(github.event.head_commit.message, '[skip benchmarks]') && github.event.pull_request.head.repo.full_name == github.event.pull_request.base.repo.full_name }}
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4

diff --git a/KomaMRICore/Project.toml b/KomaMRICore/Project.toml
@@ -11,6 +11,7 @@ KomaMRIBase = "d0bc0b20-b151-4d03-b2a4-6ca51751cb9c"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
+Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
 ThreadsX = "ac1d9e8a-700a-412c-b207-f0111f4b6c0d"
 
 [weakdeps]

diff --git a/KomaMRICore/ext/KomaAMDGPUExt.jl b/KomaMRICore/ext/KomaAMDGPUExt.jl
@@ -1,6 +1,7 @@
 module KomaAMDGPUExt
 
 using AMDGPU
+using Suppressor
 import KomaMRICore
 import Adapt
 
@@ -20,6 +21,7 @@ end
 
 function __init__()
     push!(KomaMRICore.LOADED_BACKENDS[], ROCBackend())
+    @suppress AMDGPU.allowscalar(true)
 end
 
 end
diff --git a/KomaMRICore/ext/KomaCUDAExt.jl b/KomaMRICore/ext/KomaCUDAExt.jl
@@ -1,6 +1,7 @@
 module KomaCUDAExt
 
 using CUDA
+using Suppressor
 import KomaMRICore
 import Adapt
 
@@ -19,6 +20,7 @@ end
 
 function __init__()
     push!(KomaMRICore.LOADED_BACKENDS[], CUDABackend())
+    @suppress CUDA.allowscalar(true)
 end
 
 end
diff --git a/KomaMRICore/ext/KomaMetalExt.jl b/KomaMRICore/ext/KomaMetalExt.jl
@@ -3,6 +3,7 @@
 module KomaMetalExt
 
 using Metal
+using Suppressor
 import KomaMRICore
 import Adapt
 
@@ -16,16 +17,9 @@ function KomaMRICore._print_devices(::MetalBackend)
     @info "Metal device type: $(KomaMRICore.device_name(MetalBackend()))"
 end
 
-#Temporary workaround for https://github.com/JuliaGPU/Metal.jl/issues/348
-#Once run_spin_excitation! and run_spin_precession! are kernel-based, this code
-#can be removed
-Base.cumsum(x::MtlVector{T}) where T = convert(MtlVector{T}, cumsum(KomaMRICore.cpu(x)))
-Base.cumsum(x::MtlArray{T}; dims) where T = convert(MtlArray{T}, cumsum(KomaMRICore.cpu(x), dims=dims))
-Base.findall(x::MtlVector{Bool}) = convert(MtlVector, findall(KomaMRICore.cpu(x)))
-
 function __init__()
     push!(KomaMRICore.LOADED_BACKENDS[], MetalBackend())
-    @warn "Metal does not support all array operations used by KomaMRI (https://github.com/JuliaGPU/Metal.jl/issues/348). GPU performance may be slower than expected"
+    @suppress Metal.allowscalar(true)
 end
 
 end

diff --git a/KomaMRICore/ext/KomaoneAPIExt.jl b/KomaMRICore/ext/KomaoneAPIExt.jl
@@ -1,6 +1,7 @@
 module KomaoneAPIExt
 
 using oneAPI
+using Suppressor
 import KomaMRICore
 import Adapt
 
@@ -17,15 +18,9 @@ function KomaMRICore._print_devices(::oneAPIBackend)
     @info "$(length(oneAPI.devices())) oneAPI capable device(s)." devices...
 end
 
-#Temporary workaround since oneAPI.jl (similar to Metal) does not support some array operations
-#Once run_spin_excitation! and run_spin_precession! are kernel-based, this code can be removed
-Base.cumsum(x::oneVector{T}) where T = convert(oneVector{T}, cumsum(KomaMRICore.cpu(x)))
-Base.cumsum(x::oneArray{T}; dims) where T = convert(oneArray{T}, cumsum(KomaMRICore.cpu(x), dims=dims))
-Base.findall(x::oneVector{Bool}) = convert(oneVector, findall(KomaMRICore.cpu(x)))
-
 function __init__()
     push!(KomaMRICore.LOADED_BACKENDS[], oneAPIBackend())
-    @warn "oneAPI does not support all array operations used by KomaMRI. GPU performance may be slower than expected"
+    @suppress oneAPI.allowscalar(true)
 end
 
 end
diff --git a/KomaMRICore/src/simulation/Bloch/BlochDictSimulationMethod.jl b/KomaMRICore/src/simulation/Bloch/BlochDictSimulationMethod.jl
@@ -14,6 +14,30 @@ function sim_output_dim(
     return (sum(seq.ADC.N), length(obj), out_state_dim)
 end
 
+"""Preallocated arrays for use in run_spin_precession."""
+struct BlochDictPrealloc{T} <: PreallocResult{T}
+    Bz_1::AbstractVector{T}
+    Bz_2::AbstractVector{T}
+    ϕ::AbstractVector{T}
+end
+
+Base.view(p::BlochDictPrealloc, i::UnitRange) = begin
+    @views BlochDictPrealloc(
+        p.Bz_1[i],
+        p.Bz_2[i],
+        p.ϕ[i]
+    )
+end
+
+"""BlochDict preallocation function. Returns arrays for use in run_spin_precession."""
+function prealloc(sim_method::BlochDict, obj::Phantom{T}, M::Mag{T}) where {T<:Real}
+    BlochDictPrealloc(
+        similar(obj.x),
+        similar(obj.x),
+        similar(obj.x),
+    )
+end
+
 """
     run_spin_precession(obj, seq, Xt, sig)
 
@@ -35,33 +59,59 @@ function run_spin_precession!(
     sig::AbstractArray{Complex{T}},
     M::Mag{T},
     sim_method::BlochDict,
-    backend::KA.Backend
+    backend::KA.Backend,
+    prealloc::BlochDictPrealloc
 ) where {T<:Real}
     #Simulation
     #Motion
     x, y, z = get_spin_coords(p.motion, p.x, p.y, p.z, seq.t')
-    #Effective field
-    Bz = x .* seq.Gx' .+ y .* seq.Gy' .+ z .* seq.Gz' .+ p.Δw / T(2π * γ)
-    #Rotation
-    if is_ADC_on(seq)
-        ϕ = T(-2π * γ) .* KomaMRIBase.cumtrapz(seq.Δt', Bz, backend)
-    else
-        ϕ = T(-2π * γ) .* trapz(seq.Δt', Bz)
+
+    #Initialize arrays
+    Bz_1 = prealloc.Bz_1
+    Bz_2 = prealloc.Bz_2
+    ϕ = prealloc.ϕ
+    fill!(ϕ, zero(T))
+    Bz_1 .= x[:,1] .* seq.Gx[1] .+ y[:,1] .* seq.Gy[1] .+ z[:,1] .* seq.Gz[1] .+ p.Δw / T(2π * γ)
+
+    # Fill sig[1] if needed
+    ADC_idx = 1
+    if (seq.ADC[1])
+        sig[1,:,1] .= M.xy
+        if sim_method.save_Mz
+            sig[1,:,2] .= M.z
+        end
+        ADC_idx += 1
     end
-    #Mxy precession and relaxation, and Mz relaxation
-    tp = cumsum(seq.Δt) # t' = t - t0
-    dur = sum(seq.Δt)   # Total length, used for signal relaxation
-    Mxy = [M.xy M.xy .* exp.(-tp' ./ p.T2) .* (cos.(ϕ) .+ im * sin.(ϕ))] #This assumes Δw and T2 are constant in time
-    M.xy .= Mxy[:, end]
-    #Acquired signal
-    sig[:, :, 1] .= transpose(Mxy[:, findall(seq.ADC)])
-
-    if sim_method.save_Mz
-        Mz = [M.z M.z .* exp.(-tp' ./ p.T1) .+ p.ρ .* (1 .- exp.(-tp' ./ p.T1))] #Calculate intermediate points
-        sig[:, :, 2] .= transpose(Mz[:, findall(seq.ADC)]) #Save state to signal
-        M.z .= Mz[:, end]
-    else
-        M.z .= M.z .* exp.(-dur ./ p.T1) .+ p.ρ .* (1 .- exp.(-dur ./ p.T1)) #Jump to the last point
+
+    t_seq = zero(T) # Time
+    for seq_idx=2:length(seq.t)
+        t_seq += seq.Δt[seq_idx-1]
+
+        #Effective Field
+        if size(x,2) > 1 #Motion
+            Bz_2 .= x[:,seq_idx] .* seq.Gx[seq_idx] .+ y[:,seq_idx] .* seq.Gy[seq_idx] .+ z[:,seq_idx] .* seq.Gz[seq_idx] .+ p.Δw / T(2π * γ)
+        else             #No motion
+            Bz_2 .= x .* seq.Gx[seq_idx] .+ y .* seq.Gy[seq_idx] .+ z.* seq.Gz[seq_idx] .+ p.Δw / T(2π * γ)
+        end
+
+        #Rotation
+        ϕ .= ϕ .+ (Bz_1 .+ Bz_2) .* (T(-2π * γ) * seq.Δt[seq_idx-1] / 2)
+
+        #Acquired Signal
+        if seq_idx <= length(seq.ADC) && any(seq.ADC[seq_idx,:])
+            sig[ADC_idx,:,1] .= M.xy .* exp.(-t_seq ./ p.T2) .* (cos.(ϕ) .+ im * sin.(ϕ))
+            if sim_method.save_Mz
+                sig[ADC_idx,:,2] .= M.z .* exp.(-t_seq ./ p.T1) .+ p.ρ .* (1 .- exp.(-t_seq ./ p.T1))
+            end
+            ADC_idx += 1
+        end
+
+        Bz_1, Bz_2 = Bz_2, Bz_1
     end
+
+    #Final Spin-State
+    M.xy .= M.xy .* exp.(-t_seq ./ p.T2) .* (cos.(ϕ) .+ im * sin.(ϕ))
+    M.z .= M.z .* exp.(-t_seq ./ p.T1) .+ p.ρ .* (1 .- exp.(-t_seq ./ p.T1))
+
     return nothing
 end
diff --git a/KomaMRICore/src/simulation/Bloch/BlochSimulationMethod.jl b/KomaMRICore/src/simulation/Bloch/BlochSimulationMethod.jl
@@ -22,6 +22,33 @@ function initialize_spins_state(
     return Xt, obj
 end
 
+"""Preallocated arrays for use in run_spin_precession."""
+struct BlochPrealloc{T} <: PreallocResult{T}
+    Bz_1::AbstractVector{T}
+    Bz_2::AbstractVector{T}
+    ϕ::AbstractVector{T}
+    Mxy::AbstractVector{Complex{T}}
+end
+
+Base.view(p::BlochPrealloc, i::UnitRange) = begin
+    @views BlochPrealloc(
+        p.Bz_1[i],
+        p.Bz_2[i],
+        p.ϕ[i],
+        p.Mxy[i]
+    )
+end
+
+"""Default preallocation function. Returns arrays for use in run_spin_precession."""
+function prealloc(sim_method::SimulationMethod, obj::Phantom{T}, M::Mag{T}) where {T<:Real}
+    BlochPrealloc(
+        similar(obj.x),
+        similar(obj.x),
+        similar(obj.x),
+        similar(M.xy)
+    )
+end
+
 """
     run_spin_precession(obj, seq, Xt, sig)
 
@@ -43,27 +70,55 @@ function run_spin_precession!(
     sig::AbstractArray{Complex{T}},
     M::Mag{T},
     sim_method::SimulationMethod,
-    backend::KA.Backend
+    backend::KA.Backend,
+    prealloc::BlochPrealloc
 ) where {T<:Real}
     #Simulation
     #Motion
     x, y, z = get_spin_coords(p.motion, p.x, p.y, p.z, seq.t')
-    #Effective field
-    Bz = x .* seq.Gx' .+ y .* seq.Gy' .+ z .* seq.Gz' .+ p.Δw / T(2π * γ)
-    #Rotation
-    if is_ADC_on(seq)
-        ϕ = T(-2π * γ) .* KomaMRIBase.cumtrapz(seq.Δt', Bz, backend)
-    else
-        ϕ = T(-2π * γ) .* trapz(seq.Δt', Bz)
+
+    #Initialize arrays
+    Bz_1 = prealloc.Bz_1
+    Bz_2 = prealloc.Bz_2
+    ϕ = prealloc.ϕ
+    Mxy = prealloc.Mxy
+    fill!(ϕ, zero(T))
+    Bz_1 .= x[:,1] .* seq.Gx[1] .+ y[:,1] .* seq.Gy[1] .+ z[:,1] .* seq.Gz[1] .+ p.Δw / T(2π * γ)
+
+    # Fill sig[1] if needed
+    ADC_idx = 1
+    if (seq.ADC[1])
+        sig[1] = sum(M.xy)
+        ADC_idx += 1
     end
-    #Mxy precession and relaxation, and Mz relaxation
-    tp   = cumsum(seq.Δt) # t' = t - t0
-    dur  = sum(seq.Δt)   # Total length, used for signal relaxation
-    Mxy = [M.xy M.xy .* exp.(-tp' ./ p.T2) .* (cos.(ϕ) .+ im * sin.(ϕ))] #This assumes Δw and T2 are constant in time
-    M.xy .= Mxy[:, end]
-    M.z  .= M.z .* exp.(-dur ./ p.T1) .+ p.ρ .* (1 .- exp.(-dur ./ p.T1))
-    #Acquired signal
-    sig .= transpose(sum(Mxy[:, findall(seq.ADC)]; dims=1)) #<--- TODO: add coil sensitivities
+
+    t_seq = zero(T) # Time
+    for seq_idx=2:length(seq.t)
+        t_seq += seq.Δt[seq_idx-1]
+
+        #Effective Field
+        if size(x,2) > 1 #Motion
+            Bz_2 .= x[:,seq_idx] .* seq.Gx[seq_idx] .+ y[:,seq_idx] .* seq.Gy[seq_idx] .+ z[:,seq_idx] .* seq.Gz[seq_idx] .+ p.Δw / T(2π * γ)
+        else             #No motion
+            Bz_2 .= x .* seq.Gx[seq_idx] .+ y .* seq.Gy[seq_idx] .+ z.* seq.Gz[seq_idx] .+ p.Δw / T(2π * γ)
+        end
+
+        #Rotation
+        ϕ .= ϕ .+ (Bz_1 .+ Bz_2) .* (T(-2π * γ) * seq.Δt[seq_idx-1] / 2)
+
+        #Acquired Signal
+        if seq_idx <= length(seq.ADC) && any(seq.ADC[seq_idx,:])
+            Mxy .= M.xy .* exp.([-t_seq] ./ p.T2) .* (cos.(ϕ) .+ im * sin.(ϕ))
+            sig[ADC_idx] = sum(Mxy) 
+            ADC_idx += 1
+        end
+
+        Bz_1, Bz_2 = Bz_2, Bz_1
+    end
+
+    #Final Spin-State
+    M.xy .= M.xy .* exp.(-t_seq ./ p.T2) .* (cos.(ϕ) .+ im * sin.(ϕ))
+    M.z .= M.z .* exp.(-t_seq ./ p.T1) .+ p.ρ .* (1 .- exp.(-t_seq ./ p.T1))
 
     return nothing
 end

diff --git a/KomaMRICore/src/simulation/Bloch/KernelFunctions.jl b/KomaMRICore/src/simulation/Bloch/KernelFunctions.jl