diff --git a/KomaMRICore/src/datatypes/Spinor.jl b/KomaMRICore/src/datatypes/Spinor.jl
index 803578081..8d8977bbb 100644
--- a/KomaMRICore/src/datatypes/Spinor.jl
+++ b/KomaMRICore/src/datatypes/Spinor.jl
@@ -31,6 +31,7 @@ Spinor(α::Complex{T}, β::Complex{T}) where {T<:Real} = Spinor([α], [β])
 Spinor(α::T, β::T) where {T<:Real} = Spinor([complex(α)], [complex(β)])
 one(T::Spinor) = Spinor(1.,0.)
 Base.getindex(s::Spinor, i) = Spinor(s.α[i], s.β[i])
+Base.view(s::Spinor, i::UnitRange) = @views Spinor(s.α[i], s.β[i])
 """
     str = show(io::IO, s::Spinor)
 
diff --git a/KomaMRICore/src/simulation/SimMethods/Bloch/BlochCPU.jl b/KomaMRICore/src/simulation/SimMethods/Bloch/BlochCPU.jl
new file mode 100644
index 000000000..de59c88ba
--- /dev/null
+++ b/KomaMRICore/src/simulation/SimMethods/Bloch/BlochCPU.jl
@@ -0,0 +1,151 @@
+"""Stores preallocated structs for use in Bloch CPU run_spin_precession function."""
+struct BlochCPUPrealloc{T} <: PreallocResult{T}
+    M::Mag{T}                               # Mag{T}
+    Bz_old::AbstractVector{T}               # Vector{T}(Nspins x 1)
+    Bz_new::AbstractVector{T}               # Vector{T}(Nspins x 1)
+    ϕ::AbstractVector{T}                    # Vector{T}(Nspins x 1)
+    φ::AbstractVector{T}                    # Vector{T}(Nspins x 1)
+    Rot::Spinor{T}                          # Spinor{T}
+end
+
+Base.view(p::BlochCPUPrealloc, i::UnitRange) = begin
+    @views BlochCPUPrealloc(
+        p.M[i],
+        p.Bz_old[i],
+        p.Bz_new[i],
+        p.ϕ[i],
+        p.φ[i],
+        p.Rot[i]
+    )
+end
+
+"""Preallocates arrays for use in run_spin_precession."""
+function prealloc(sim_method::Bloch, backend::KA.CPU, obj::Phantom{T}, M::Mag{T}) where {T<:Real}
+    return BlochCPUPrealloc(
+        Mag(
+            similar(M.xy),
+            similar(M.z)
+        ),
+        similar(obj.x),
+        similar(obj.x),
+        similar(obj.x),
+        similar(obj.x),
+        Spinor(
+            similar(M.xy),
+            similar(M.xy)
+        )
+    )
+end
+
+"""
+    run_spin_precession(obj, seq, Xt, sig, M, sim_method, backend, prealloc)
+
+Alternate implementation of the run_spin_precession! function in BlochSimpleSimulationMethod.jl 
+optimized for the CPU. Uses a loop to step through time instead of allocating a matrix of size 
+NSpins x seq.t. The Bz_old, Bz_new, ϕ, and Mxy arrays are pre-allocated in run_sim_time_iter! so 
+that they can be re-used from block to block.
+"""
+function run_spin_precession!(
+    p::Phantom{T},
+    seq::DiscreteSequence{T},
+    sig::AbstractArray{Complex{T}},
+    M::Mag{T},
+    sim_method::Bloch,
+    backend::KA.CPU,
+    prealloc::BlochCPUPrealloc
+) where {T<:Real}
+    #Simulation
+    #Motion
+    x, y, z = get_spin_coords(p.motion, p.x, p.y, p.z, seq.t[1,:]')
+    
+    #Initialize arrays
+    Bz_old = prealloc.Bz_old
+    Bz_new = prealloc.Bz_new
+    ϕ = prealloc.ϕ
+    Mxy = prealloc.M.xy
+    fill!(ϕ, zero(T))
+    @. Bz_old = x[:,1] * seq.Gx[1] + y[:,1] * seq.Gy[1] + z[:,1] * seq.Gz[1] + p.Δw / T(2π * γ)
+
+    # Fill sig[1] if needed
+    ADC_idx = 1
+    if (seq.ADC[1])
+        sig[1] = sum(M.xy)
+        ADC_idx += 1
+    end
+
+    t_seq = zero(T) # Time
+    for seq_idx=2:length(seq.t)
+        x, y, z = get_spin_coords(p.motion, p.x, p.y, p.z, seq.t[seq_idx,:]')
+        t_seq += seq.Δt[seq_idx-1]
+
+        #Effective Field
+        @. Bz_new = x * seq.Gx[seq_idx] + y * seq.Gy[seq_idx] + z * seq.Gz[seq_idx] + p.Δw / T(2π * γ)
+        
+        #Rotation
+        @. ϕ += (Bz_old + Bz_new) * T(-π * γ) * seq.Δt[seq_idx-1]
+
+        #Acquired Signal
+        if seq_idx <= length(seq.ADC) && seq.ADC[seq_idx]
+            @. Mxy = exp(-t_seq / p.T2) * M.xy * cis(ϕ)
+            sig[ADC_idx] = sum(Mxy) 
+            ADC_idx += 1
+        end
+
+        Bz_old, Bz_new = Bz_new, Bz_old
+    end
+
+    #Final Spin-State
+    @. M.xy = M.xy * exp(-t_seq / p.T2) * cis(ϕ)
+    @. M.z = M.z * exp(-t_seq / p.T1) + p.ρ * (T(1) - exp(-t_seq / p.T1))
+
+    return nothing
+end
+
+"""
+    run_spin_excitation!(obj, seq, sig, M, sim_method, backend, prealloc)
+
+Alternate implementation of the run_spin_excitation! function in BlochSimpleSimulationMethod.jl 
+optimized for the CPU. Uses preallocation for all arrays to reduce memory usage.
+"""
+function run_spin_excitation!(
+    p::Phantom{T},
+    seq::DiscreteSequence{T},
+    sig::AbstractArray{Complex{T}},
+    M::Mag{T},
+    sim_method::Bloch,
+    backend::KA.CPU,
+    prealloc::BlochCPUPrealloc
+) where {T<:Real}
+    ΔBz = prealloc.Bz_old
+    Bz = prealloc.Bz_new
+    B = prealloc.ϕ
+    φ = prealloc.φ
+    α = prealloc.Rot.α
+    β = prealloc.Rot.β
+    Maux_xy = prealloc.M.xy
+    Maux_z = prealloc.M.z
+
+    #Can be calculated outside of loop
+    @. ΔBz = p.Δw / T(2π * γ)
+
+    #Simulation
+    for s in seq #This iterates over seq, "s = seq[i,:]"
+        #Motion
+        x, y, z = get_spin_coords(p.motion, p.x, p.y, p.z, s.t)
+        #Effective field
+        @. Bz = (s.Gx * x + s.Gy * y + s.Gz * z) + ΔBz - s.Δf / T(γ) # ΔB_0 = (B_0 - ω_rf/γ), Need to add a component here to model scanner's dB0(x,y,z)
+        @. B = sqrt(abs(s.B1)^2 + abs(Bz)^2)
+        @. B[B == 0] = eps(T)
+        #Spinor Rotation
+        @. φ = T(-π * γ) * (B * s.Δt) # TODO: Use trapezoidal integration here (?),  this is just Forward Euler 
+        @. α = cos(φ) - Complex{T}(im) * (Bz / B) * sin(φ)
+        @. β = -Complex{T}(im) * (s.B1 / B) * sin(φ)
+        mul!(Spinor(α, β), M, Maux_xy, Maux_z)
+        #Relaxation
+        @. M.xy = M.xy * exp(-s.Δt / p.T2)
+        @. M.z = M.z * exp(-s.Δt / p.T1) + p.ρ * (T(1) - exp(-s.Δt / p.T1))
+    end
+    #Acquired signal
+    #sig .= -1.4im #<-- This was to test if an ADC point was inside an RF block
+    return nothing
+end
\ No newline at end of file
diff --git a/KomaMRICore/src/simulation/Bloch/BlochDictSimulationMethod.jl b/KomaMRICore/src/simulation/SimMethods/BlochDict/BlochDict.jl
similarity index 83%
rename from KomaMRICore/src/simulation/Bloch/BlochDictSimulationMethod.jl
rename to KomaMRICore/src/simulation/SimMethods/BlochDict/BlochDict.jl
index 3cf7abb6c..24bb7b9dc 100644
--- a/KomaMRICore/src/simulation/Bloch/BlochDictSimulationMethod.jl
+++ b/KomaMRICore/src/simulation/SimMethods/BlochDict/BlochDict.jl
@@ -35,23 +35,24 @@ function run_spin_precession!(
     sig::AbstractArray{Complex{T}},
     M::Mag{T},
     sim_method::BlochDict,
-    backend::KA.Backend
+    backend::KA.Backend,
+    prealloc::PreallocResult
 ) where {T<:Real}
     #Simulation
     #Motion
     x, y, z = get_spin_coords(p.motion, p.x, p.y, p.z, seq.t')
     #Effective field
-    Bz = x .* seq.Gx' .+ y .* seq.Gy' .+ z .* seq.Gz' .+ p.Δw / T(2π * γ)
+    Bz = x .* seq.Gx' .+ y .* seq.Gy' .+ z .* seq.Gz' .+ p.Δw ./ T(2π .* γ)
     #Rotation
     if is_ADC_on(seq)
-        ϕ = T(-2π * γ) .* KomaMRIBase.cumtrapz(seq.Δt', Bz, backend)
+        ϕ = T(-2π .* γ) .* KomaMRIBase.cumtrapz(seq.Δt', Bz, backend)
     else
-        ϕ = T(-2π * γ) .* trapz(seq.Δt', Bz)
+        ϕ = T(-2π .* γ) .* trapz(seq.Δt', Bz)
     end
     #Mxy precession and relaxation, and Mz relaxation
     tp = cumsum(seq.Δt) # t' = t - t0
     dur = sum(seq.Δt)   # Total length, used for signal relaxation
-    Mxy = [M.xy M.xy .* exp.(-tp' ./ p.T2) .* (cos.(ϕ) .+ im * sin.(ϕ))] #This assumes Δw and T2 are constant in time
+    Mxy = [M.xy M.xy .* exp.(-tp' ./ p.T2) .* (cos.(ϕ) .+ im .* sin.(ϕ))] #This assumes Δw and T2 are constant in time
     M.xy .= Mxy[:, end]
     #Acquired signal
     sig[:, :, 1] .= transpose(Mxy[:, findall(seq.ADC)])
@@ -64,4 +65,4 @@ function run_spin_precession!(
         M.z .= M.z .* exp.(-dur ./ p.T1) .+ p.ρ .* (1 .- exp.(-dur ./ p.T1)) #Jump to the last point
     end
     return nothing
-end
+end
\ No newline at end of file
diff --git a/KomaMRICore/src/simulation/Bloch/BlochSimulationMethod.jl b/KomaMRICore/src/simulation/SimMethods/BlochSimple/BlochSimple.jl
similarity index 67%
rename from KomaMRICore/src/simulation/Bloch/BlochSimulationMethod.jl
rename to KomaMRICore/src/simulation/SimMethods/BlochSimple/BlochSimple.jl
index a6dabbd0a..3b662cb74 100644
--- a/KomaMRICore/src/simulation/Bloch/BlochSimulationMethod.jl
+++ b/KomaMRICore/src/simulation/SimMethods/BlochSimple/BlochSimple.jl
@@ -1,26 +1,10 @@
-struct Bloch <: SimulationMethod end
+#Simplest sim method, works for GPU and CPU but not optimized for either. Although Bloch()
+#is the simulation method chosen if none is passed, the run_spin_precession! and
+#run_spin_excitation! functions in this file are dispatched to at the most abstract level,
+#so new simulation methods will start by using these functions.
+struct BlochSimple <: SimulationMethod end
 
-export Bloch
-
-include("Magnetization.jl") #Defines Mag <: SpinStateRepresentation
-@functor Mag #Gives gpu acceleration capabilities, see GPUFunctions.jl
-
-function sim_output_dim(
-    obj::Phantom{T}, seq::Sequence, sys::Scanner, sim_method::SimulationMethod
-) where {T<:Real}
-    return (sum(seq.ADC.N), 1) #Nt x Ncoils, This should consider the coil info from sys
-end
-
-"""Magnetization initialization for Bloch simulation method."""
-function initialize_spins_state(
-    obj::Phantom{T}, sim_method::SimulationMethod
-) where {T<:Real}
-    Nspins = length(obj)
-    Mxy = zeros(T, Nspins)
-    Mz = obj.ρ
-    Xt = Mag{T}(Mxy, Mz)
-    return Xt, obj
-end
+export BlochSimple
 
 """
     run_spin_precession(obj, seq, Xt, sig)
@@ -43,23 +27,24 @@ function run_spin_precession!(
     sig::AbstractArray{Complex{T}},
     M::Mag{T},
     sim_method::SimulationMethod,
-    backend::KA.Backend
+    backend::KA.Backend,
+    prealloc::PreallocResult
 ) where {T<:Real}
     #Simulation
     #Motion
     x, y, z = get_spin_coords(p.motion, p.x, p.y, p.z, seq.t')
     #Effective field
-    Bz = x .* seq.Gx' .+ y .* seq.Gy' .+ z .* seq.Gz' .+ p.Δw / T(2π * γ)
+    Bz = x .* seq.Gx' .+ y .* seq.Gy' .+ z .* seq.Gz' .+ p.Δw ./ T(2π .* γ)
     #Rotation
     if is_ADC_on(seq)
-        ϕ = T(-2π * γ) .* KomaMRIBase.cumtrapz(seq.Δt', Bz, backend)
+        ϕ = T(-2π .* γ) .* KomaMRIBase.cumtrapz(seq.Δt', Bz, backend)
     else
-        ϕ = T(-2π * γ) .* trapz(seq.Δt', Bz)
+        ϕ = T(-2π .* γ) .* trapz(seq.Δt', Bz)
     end
     #Mxy precession and relaxation, and Mz relaxation
     tp   = cumsum(seq.Δt) # t' = t - t0
     dur  = sum(seq.Δt)   # Total length, used for signal relaxation
-    Mxy = [M.xy M.xy .* exp.(-tp' ./ p.T2) .* (cos.(ϕ) .+ im * sin.(ϕ))] #This assumes Δw and T2 are constant in time
+    Mxy = [M.xy M.xy .* exp.(-tp' ./ p.T2) .* (cos.(ϕ) .+ im .* sin.(ϕ))] #This assumes Δw and T2 are constant in time
     M.xy .= Mxy[:, end]
     M.z  .= M.z .* exp.(-dur ./ p.T1) .+ p.ρ .* (1 .- exp.(-dur ./ p.T1))
     #Acquired signal
@@ -89,18 +74,20 @@ function run_spin_excitation!(
     sig::AbstractArray{Complex{T}},
     M::Mag{T},
     sim_method::SimulationMethod,
+    backend::KA.Backend,
+    prealloc::PreallocResult
 ) where {T<:Real}
     #Simulation
     for s in seq #This iterates over seq, "s = seq[i,:]"
         #Motion
         x, y, z = get_spin_coords(p.motion, p.x, p.y, p.z, s.t)
         #Effective field
-        ΔBz = p.Δw ./ T(2π * γ) .- s.Δf ./ T(γ) # ΔB_0 = (B_0 - ω_rf/γ), Need to add a component here to model scanner's dB0(x,y,z)
+        ΔBz = p.Δw ./ T(2π .* γ) .- s.Δf ./ T(γ) # ΔB_0 = (B_0 - ω_rf/γ), Need to add a component here to model scanner's dB0(x,y,z)
         Bz = (s.Gx .* x .+ s.Gy .* y .+ s.Gz .* z) .+ ΔBz
         B = sqrt.(abs.(s.B1) .^ 2 .+ abs.(Bz) .^ 2)
         B[B .== 0] .= eps(T)
         #Spinor Rotation
-        φ = T(-2π * γ) * (B .* s.Δt) # TODO: Use trapezoidal integration here (?),  this is just Forward Euler
+        φ = T(-2π .* γ) .* (B .* s.Δt) # TODO: Use trapezoidal integration here (?),  this is just Forward Euler
         mul!(Q(φ, s.B1 ./ B, Bz ./ B), M)
         #Relaxation
         M.xy .= M.xy .* exp.(-s.Δt ./ p.T2)
@@ -109,4 +96,4 @@ function run_spin_excitation!(
     #Acquired signal
     #sig .= -1.4im #<-- This was to test if an ADC point was inside an RF block
     return nothing
-end
+end
\ No newline at end of file
diff --git a/KomaMRICore/src/simulation/Bloch/KernelFunctions.jl b/KomaMRICore/src/simulation/SimMethods/KernelFunctions.jl
similarity index 100%
rename from KomaMRICore/src/simulation/Bloch/KernelFunctions.jl
rename to KomaMRICore/src/simulation/SimMethods/KernelFunctions.jl
diff --git a/KomaMRICore/src/simulation/Bloch/Magnetization.jl b/KomaMRICore/src/simulation/SimMethods/Magnetization.jl
similarity index 66%
rename from KomaMRICore/src/simulation/Bloch/Magnetization.jl
rename to KomaMRICore/src/simulation/SimMethods/Magnetization.jl
index 78552cf71..1ed77bdcf 100644
--- a/KomaMRICore/src/simulation/Bloch/Magnetization.jl
+++ b/KomaMRICore/src/simulation/SimMethods/Magnetization.jl
@@ -45,17 +45,23 @@ Parameter relations for the Shinnar-Le Roux selective excitation pulse design al
 (NMR imaging).
 IEEE Transactions on Medical Imaging, 10(1), 53-65. doi:10.1109/42.75611
 """
-mul!(s::Spinor, M::Mag) = begin
+mul!(s::Spinor{T}, M::Mag) where {T<:Real} = begin
     M_aux = Mag(
-        2*conj.(s.α).*s.β.*M.z.+conj.(s.α).^2 .* M.xy.-s.β.^2 .*conj.(M.xy),
-        (abs.(s.α).^2 .-abs.(s.β).^2).*M.z.-2*real.(s.α.*s.β.*conj.(M.xy))
+        T(2) .*conj.(s.α).*s.β.*M.z.+conj.(s.α).^2 .* M.xy.-s.β.^2 .*conj.(M.xy),
+        (abs.(s.α).^2 .-abs.(s.β).^2).*M.z.-T(2) .*real.(s.α.*s.β.*conj.(M.xy))
      )
     M.xy .= M_aux.xy
     M.z  .= M_aux.z
 end
-*(s::Spinor, M::Mag) = begin
+mul!(s::Spinor{T}, M::Mag, Maux_xy, Maux_z) where {T<:Real} = begin
+    @. Maux_xy = T(2)*conj(s.α)*s.β*M.z+conj(s.α)^2*M.xy-s.β^2*conj(M.xy)
+    @. Maux_z = (abs(s.α)^2 -abs(s.β)^2)*M.z-T(2) *real(s.α*s.β*conj(M.xy))
+    @. M.xy = Maux_xy
+    @. M.z = Maux_z
+end
+*(s::Spinor{T}, M::Mag) where {T<:Real} = begin
     Mag(
-        2*conj.(s.α).*s.β.*M.z.+conj.(s.α).^2 .* M.xy.-s.β.^2 .*conj.(M.xy),
-        (abs.(s.α).^2 .-abs.(s.β).^2).*M.z.-2*real.(s.α.*s.β.*conj.(M.xy))
+        T(2) .*conj.(s.α).*s.β.*M.z.+conj.(s.α).^2 .* M.xy.-s.β.^2 .*conj.(M.xy),
+        (abs.(s.α).^2 .-abs.(s.β).^2).*M.z.-T(2) .*real.(s.α.*s.β.*conj.(M.xy))
      )
 end
diff --git a/KomaMRICore/src/simulation/SimMethods/SimulationMethod.jl b/KomaMRICore/src/simulation/SimMethods/SimulationMethod.jl
new file mode 100644
index 000000000..0fbc236bc
--- /dev/null
+++ b/KomaMRICore/src/simulation/SimMethods/SimulationMethod.jl
@@ -0,0 +1,40 @@
+#Bloch is the default simulation method
+struct Bloch <: SimulationMethod end
+
+export Bloch
+
+include("Magnetization.jl")
+
+@functor Mag #Gives gpu acceleration capabilities, see GPUFunctions.jl
+
+function sim_output_dim(
+    obj::Phantom{T}, seq::Sequence, sys::Scanner, sim_method::SimulationMethod
+) where {T<:Real}
+    return (sum(seq.ADC.N), 1) #Nt x Ncoils, This should consider the coil info from sys
+end
+
+"""Magnetization initialization for Bloch simulation method."""
+function initialize_spins_state(
+    obj::Phantom{T}, sim_method::SimulationMethod
+) where {T<:Real}
+    Nspins = length(obj)
+    Mxy = zeros(T, Nspins)
+    Mz = obj.ρ
+    Xt = Mag{T}(Mxy, Mz)
+    return Xt, obj
+end
+
+abstract type PreallocResult{T<:Real} end
+
+"""Default preallocation struct, stores nothing."""
+struct DefaultPreAlloc{T} <: PreallocResult{T} end
+
+Base.view(p::DefaultPreAlloc, i::UnitRange) = p
+
+"""Default preallocation function."""
+prealloc(sim_method::SimulationMethod, backend::KA.Backend, obj::Phantom{T}, M::Mag{T}) where {T<:Real} = DefaultPreAlloc{T}()
+
+include("KernelFunctions.jl")
+include("BlochSimple/BlochSimple.jl")
+include("Bloch/BlochCPU.jl")
+include("BlochDict/BlochDict.jl")
\ No newline at end of file
diff --git a/KomaMRICore/src/simulation/SimulatorCore.jl b/KomaMRICore/src/simulation/SimulatorCore.jl
index a3040f34b..8a6e9eca3 100644
--- a/KomaMRICore/src/simulation/SimulatorCore.jl
+++ b/KomaMRICore/src/simulation/SimulatorCore.jl
@@ -2,9 +2,7 @@ abstract type SimulationMethod end #get all available types by using subtypes(Ko
 abstract type SpinStateRepresentation{T<:Real} end #get all available types by using subtypes(KomaMRI.SpinStateRepresentation)
 
 #Defined methods:
-include("Bloch/KernelFunctions.jl")
-include("Bloch/BlochSimulationMethod.jl")       #Defines Bloch simulation method
-include("Bloch/BlochDictSimulationMethod.jl")   #Defines BlochDict simulation method
+include("SimMethods/SimulationMethod.jl")  #Defines simulation methods
 
 """
     sim_params = default_sim_params(sim_params=Dict{String,Any}())
@@ -48,7 +46,7 @@ function default_sim_params(sim_params=Dict{String,Any}())
     sampling_params = KomaMRIBase.default_sampling_params()
     get!(sim_params, "gpu", true)
     get!(sim_params, "gpu_device", nothing)
-    get!(sim_params, "Nthreads", sim_params["gpu"] ? 1 : Threads.nthreads())
+    get!(sim_params, "Nthreads", Threads.nthreads())
     get!(sim_params, "Nblocks", 20)
     get!(sim_params, "Δt", sampling_params["Δt"])
     get!(sim_params, "Δt_rf", sampling_params["Δt_rf"])
@@ -84,7 +82,8 @@ function run_spin_precession_parallel!(
     sig::AbstractArray{Complex{T}},
     Xt::SpinStateRepresentation{T},
     sim_method::SimulationMethod,
-    backend::KA.Backend;
+    backend::KA.Backend,
+    prealloc::PreallocResult;
     Nthreads=Threads.nthreads(),
 ) where {T<:Real}
     parts = kfoldperm(length(obj), Nthreads)
@@ -92,7 +91,7 @@ function run_spin_precession_parallel!(
 
     ThreadsX.foreach(enumerate(parts)) do (i, p)
         run_spin_precession!(
-            @view(obj[p]), seq, @view(sig[dims..., i]), @view(Xt[p]), sim_method, backend
+            @view(obj[p]), seq, @view(sig[dims..., i]), @view(Xt[p]), sim_method, backend, @view(prealloc[p])
         )
     end
 
@@ -123,7 +122,9 @@ function run_spin_excitation_parallel!(
     seq::DiscreteSequence{T},
     sig::AbstractArray{Complex{T}},
     Xt::SpinStateRepresentation{T},
-    sim_method::SimulationMethod;
+    sim_method::SimulationMethod,
+    backend::KA.Backend,
+    prealloc::PreallocResult;
     Nthreads=Threads.nthreads(),
 ) where {T<:Real}
     parts = kfoldperm(length(obj), Nthreads)
@@ -131,7 +132,7 @@ function run_spin_excitation_parallel!(
 
     ThreadsX.foreach(enumerate(parts)) do (i, p)
         run_spin_excitation!(
-            @view(obj[p]), seq, @view(sig[dims..., i]), @view(Xt[p]), sim_method
+            @view(obj[p]), seq, @view(sig[dims..., i]), @view(Xt[p]), sim_method, backend, @view(prealloc[p])
         )
     end
 
@@ -180,6 +181,7 @@ function run_sim_time_iter!(
     rfs = 0
     samples = 1
     progress_bar = Progress(Nblocks; desc="Running simulation...")
+    prealloc_result = prealloc(sim_method, backend, obj, Xt) 
 
     for (block, p) in enumerate(parts)
         seq_block = @view seq[p]
@@ -191,12 +193,12 @@ function run_sim_time_iter!(
         # Simulation wrappers
         if excitation_bool[block]
             run_spin_excitation_parallel!(
-                obj, seq_block, @view(sig[acq_samples, dims...]), Xt, sim_method; Nthreads
+                obj, seq_block, @view(sig[acq_samples, dims...]), Xt, sim_method, backend, prealloc_result; Nthreads
             )
             rfs += 1
         else
             run_spin_precession_parallel!(
-                obj, seq_block, @view(sig[acq_samples, dims...]), Xt, sim_method, backend; Nthreads
+                obj, seq_block, @view(sig[acq_samples, dims...]), Xt, sim_method, backend, prealloc_result; Nthreads
             )
         end
         samples += Nadc
@@ -327,6 +329,13 @@ function simulate(
 )
     #Simulation parameter unpacking, and setting defaults if key is not defined
     sim_params = default_sim_params(sim_params)
+    #Warn if user is trying to run on CPU without enabling multi-threading
+    if (!sim_params["gpu"] && Threads.nthreads() == 1)
+        @info """
+            Simulation will be run on the CPU with only 1 thread. To
+            enable multi-threading, start julia with --threads=auto.
+        """ maxlog=1
+    end
     # Simulation init
     seqd = discretize(seq; sampling_params=sim_params) # Sampling of Sequence waveforms
     parts, excitation_bool = get_sim_ranges(seqd; Nblocks=sim_params["Nblocks"]) # Generating simulation blocks
@@ -336,9 +345,12 @@ function simulate(
     Xt, obj = initialize_spins_state(obj, sim_params["sim_method"])
     # Signal init
     Ndims = sim_output_dim(obj, seq, sys, sim_params["sim_method"])
-    sig = zeros(ComplexF64, Ndims..., sim_params["Nthreads"])
     backend = get_backend(sim_params["gpu"])
     sim_params["gpu"] &= backend isa KA.GPU
+    if sim_params["gpu"]
+        sim_params["Nthreads"] = 1
+    end
+    sig = zeros(ComplexF64, Ndims..., sim_params["Nthreads"])
     if !KA.supports_float64(backend) && sim_params["precision"] == "f64"
         sim_params["precision"] = "f32"
         @info """ Backend: '$(name(backend))' does not support 64-bit precision