SciML · ParasPuneetSingh · Jun 2, 2025 · Jun 2, 2025 · Jun 2, 2025 · Jun 22, 2025
diff --git a/docs/src/optimization_packages/ode.md b/docs/src/optimization_packages/ode.md
@@ -0,0 +1,55 @@
+# OptimizationODE.jl
+
+**OptimizationODE.jl** provides ODE-based optimization methods as a solver plugin for [SciML's Optimization.jl](https://github.com/SciML/Optimization.jl). It wraps various ODE solvers to perform gradient-based optimization using continuous-time dynamics.
+
+## Installation
+
+```julia
+using Pkg
+Pkg.add(url="OptimizationODE.jl")
+```
+
+## Usage
+
+```julia
+using OptimizationODE, Optimization, ADTypes, SciMLBase
+
+function f(x, p)
+    return sum(abs2, x)
+end
+
+function g!(g, x, p)
+    @. g = 2 * x
+end
+
+x0 = [2.0, -3.0]
+p = []
+
+f_manual = OptimizationFunction(f, SciMLBase.NoAD(); grad = g!)
+prob_manual = OptimizationProblem(f_manual, x0)
+
+opt = ODEGradientDescent(dt=0.01)
+sol = solve(prob_manual, opt; maxiters=50_000)
+
+@show sol.u
+@show sol.objective
+```
+
+## Local Gradient-based Optimizers
+
+All provided optimizers are **gradient-based local optimizers** that solve optimization problems by integrating gradient-based ODEs to convergence:
+
+* `ODEGradientDescent(dt=...)` — performs basic gradient descent using the explicit Euler method. This is a simple and efficient method suitable for small-scale or well-conditioned problems.
+
+* `RKChebyshevDescent()` — uses the ROCK2 solver, a stabilized explicit Runge-Kutta method suitable for stiff problems. It allows larger step sizes while maintaining stability.
+
+* `RKAccelerated()` — leverages the Tsit5 method, a 5th-order Runge-Kutta solver that achieves faster convergence for smooth problems by improving integration accuracy.
+
+* `HighOrderDescent()` — applies Vern7, a high-order (7th-order) explicit Runge-Kutta method for even more accurate integration. This can be beneficial for problems requiring high precision.
+
+You can also define a custom optimizer using the generic `ODEOptimizer(solver; dt=nothing)` constructor by supplying any ODE solver supported by [OrdinaryDiffEq.jl](https://docs.sciml.ai/DiffEqDocs/stable/solvers/ode_solve/).
+
+## Interface Details
+
+All optimizers require gradient information (either via automatic differentiation or manually provided `grad!`). The optimization is performed by integrating the ODE defined by the negative gradient until a steady state is reached.
+
diff --git a/lib/OptimizationODE/Project.toml b/lib/OptimizationODE/Project.toml
@@ -6,10 +6,14 @@ version = "0.1.0"
 [deps]
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 SteadyStateDiffEq = "9672c7b4-1e72-59bd-8a11-6ac3964bc41f"
+NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
+Sundials = "c3572dad-4567-51f8-b174-8c6c989267f4"
+DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
 
 [compat]
 ForwardDiff = "0.10, 1"

diff --git a/lib/OptimizationODE/src/OptimizationODE.jl b/lib/OptimizationODE/src/OptimizationODE.jl
@@ -2,58 +2,123 @@ module OptimizationODE
 
 using Reexport
 @reexport using Optimization, SciMLBase
-using OrdinaryDiffEq, SteadyStateDiffEq
+using LinearAlgebra, ForwardDiff
+
+using NonlinearSolve
+using OrdinaryDiffEq, DifferentialEquations, SteadyStateDiffEq, Sundials
 
 export ODEOptimizer, ODEGradientDescent, RKChebyshevDescent, RKAccelerated, HighOrderDescent
+export DAEOptimizer, DAEMassMatrix, DAEIndexing
+
+struct ODEOptimizer{T}
+    solver::T
+end
 
-struct ODEOptimizer{T, T2}
+ODEGradientDescent() = ODEOptimizer(Euler())
+RKChebyshevDescent() = ODEOptimizer(ROCK2())
+RKAccelerated() = ODEOptimizer(Tsit5())
+HighOrderDescent() = ODEOptimizer(Vern7())
+
+struct DAEOptimizer{T}
     solver::T
-    dt::T2
 end
-ODEOptimizer(solver ; dt=nothing) = ODEOptimizer(solver, dt)
 
-# Solver Constructors (users call these)
-ODEGradientDescent(; dt)   = ODEOptimizer(Euler(); dt)
-RKChebyshevDescent()   = ODEOptimizer(ROCK2())
-RKAccelerated()        = ODEOptimizer(Tsit5())
-HighOrderDescent()  = ODEOptimizer(Vern7())
+DAEMassMatrix() = DAEOptimizer(Rosenbrock23(autodiff = false))
+DAEIndexing() = DAEOptimizer(IDA())
 
 
-SciMLBase.requiresbounds(::ODEOptimizer)              = false
-SciMLBase.allowsbounds(::ODEOptimizer)                = false
-SciMLBase.allowscallback(::ODEOptimizer)              = true
+SciMLBase.requiresbounds(::ODEOptimizer) = false
+SciMLBase.allowsbounds(::ODEOptimizer) = false
+SciMLBase.allowscallback(::ODEOptimizer) = true
 SciMLBase.supports_opt_cache_interface(::ODEOptimizer) = true
-SciMLBase.requiresgradient(::ODEOptimizer)            = true
-SciMLBase.requireshessian(::ODEOptimizer)             = false
-SciMLBase.requiresconsjac(::ODEOptimizer)             = false
-SciMLBase.requiresconshess(::ODEOptimizer)            = false
+SciMLBase.requiresgradient(::ODEOptimizer) = true
+SciMLBase.requireshessian(::ODEOptimizer) = false
+SciMLBase.requiresconsjac(::ODEOptimizer) = false
+SciMLBase.requiresconshess(::ODEOptimizer) = false
+
+
+SciMLBase.requiresbounds(::DAEOptimizer) = false
+SciMLBase.allowsbounds(::DAEOptimizer) = false
+SciMLBase.allowsconstraints(::DAEOptimizer) = true
+SciMLBase.allowscallback(::DAEOptimizer) = true
+SciMLBase.supports_opt_cache_interface(::DAEOptimizer) = true
+SciMLBase.requiresgradient(::DAEOptimizer) = true
+SciMLBase.requireshessian(::DAEOptimizer) = false
+SciMLBase.requiresconsjac(::DAEOptimizer) = true
+SciMLBase.requiresconshess(::DAEOptimizer) = false
 
 
 function SciMLBase.__init(prob::OptimizationProblem, opt::ODEOptimizer;
-    callback=Optimization.DEFAULT_CALLBACK, progress=false,
+    callback=Optimization.DEFAULT_CALLBACK, progress=false, dt=nothing,
     maxiters=nothing, kwargs...)
-
-    return OptimizationCache(prob, opt; callback=callback, progress=progress,
+    return OptimizationCache(prob, opt; callback=callback, progress=progress, dt=dt,
         maxiters=maxiters, kwargs...)
 end
 
-function SciMLBase.__solve(
-  cache::OptimizationCache{F,RC,LB,UB,LC,UC,S,O,D,P,C}
-  ) where {F,RC,LB,UB,LC,UC,S,O<:ODEOptimizer,D,P,C}
+function SciMLBase.__init(prob::OptimizationProblem, opt::DAEOptimizer;
+    callback=Optimization.DEFAULT_CALLBACK, progress=false, dt=nothing,
+    maxiters=nothing, differential_vars=nothing, kwargs...)
+    return OptimizationCache(prob, opt; callback=callback, progress=progress, dt=dt,
+        maxiters=maxiters, differential_vars=differential_vars, kwargs...)
+end
 
-    dt    = cache.opt.dt
-    maxit = get(cache.solver_args, :maxiters, 1000)
 
+function handle_parameters(p)
+    if p isa SciMLBase.NullParameters
+        return Float64[]
+    else
+        return p
+    end
+end
+
+function setup_progress_callback(cache, solve_kwargs)
+    if get(cache.solver_args, :progress, false)
+        condition = (u, t, integrator) -> true
+        affect! = (integrator) -> begin
+            u_opt = integrator.u isa AbstractArray ? integrator.u : integrator.u.u
+            cache.solver_args[:callback](u_opt, integrator.p, integrator.t)
+        end
+        cb = DiscreteCallback(condition, affect!)
+        solve_kwargs[:callback] = cb
+    end
+    return solve_kwargs
+end
+
+
+function SciMLBase.__solve(
+    cache::OptimizationCache{F,RC,LB,UB,LC,UC,S,O,D,P,C}
+    ) where {F,RC,LB,UB,LC,UC,S,O<:Union{ODEOptimizer,DAEOptimizer},D,P,C}
+
+    dt = get(cache.solver_args, :dt, nothing)
+    maxit = get(cache.solver_args, :maxiters, nothing)
+    differential_vars = get(cache.solver_args, :differential_vars, nothing)
     u0 = copy(cache.u0)
-    p  = cache.p
+    p = handle_parameters(cache.p)  # Properly handle NullParameters
 
+    if cache.opt isa ODEOptimizer
+        return solve_ode(cache, dt, maxit, u0, p)
+    else
+        if cache.opt.solver == Rosenbrock23(autodiff = false)
+            return solve_dae_mass_matrix(cache, dt, maxit, u0, p)
+        else
+            return solve_dae_indexing(cache, dt, maxit, u0, p, differential_vars)
+        end
+    end
+end
+
+function solve_ode(cache, dt, maxit, u0, p)
     if cache.f.grad === nothing
         error("ODEOptimizer requires a gradient. Please provide a function with `grad` defined.")
     end
 
     function f!(du, u, p, t)
-        cache.f.grad(du, u, p)
-        @. du = -du
+        grad_vec = similar(u)
+        if isempty(p)
+            cache.f.grad(grad_vec, u)
+        else
+            cache.f.grad(grad_vec, u, p)
+        end
+        @. du = -grad_vec
         return nothing
     end
 
@@ -62,14 +127,11 @@ function SciMLBase.__solve(
     algorithm = DynamicSS(cache.opt.solver)
 
     cb = cache.callback
-    if cb != Optimization.DEFAULT_CALLBACK || get(cache.solver_args,:progress,false) === true
-        function condition(u, t, integrator)
-            true
-        end
+    if cb != Optimization.DEFAULT_CALLBACK || get(cache.solver_args,:progress,false)
+        function condition(u, t, integrator) true end
         function affect!(integrator)
             u_now = integrator.u
-            state = Optimization.OptimizationState(u=u_now, objective=cache.f(integrator.u, integrator.p))
-            Optimization.callback_function(cb, state)
+            cache.callback(u_now, integrator.p, integrator.t)
         end
         cb_struct = DiscreteCallback(condition, affect!)
         callback = CallbackSet(cb_struct)
@@ -86,21 +148,138 @@ function SciMLBase.__solve(
     end
 
     sol = solve(ss_prob, algorithm; solve_kwargs...)
-has_destats = hasproperty(sol, :destats)
-has_t = hasproperty(sol, :t) && !isempty(sol.t)
+    has_destats = hasproperty(sol, :destats)
+    has_t = hasproperty(sol, :t) && !isempty(sol.t)
 
-stats = Optimization.OptimizationStats(
-    iterations = has_destats ? get(sol.destats, :iters, 10) : (has_t ? length(sol.t) - 1 : 10),
-    time = has_t ? sol.t[end] : 0.0,
-    fevals = has_destats ? get(sol.destats, :f_calls, 0) : 0,
-    gevals = has_destats ? get(sol.destats, :iters, 0) : 0,
-    hevals = 0
-)
+    stats = Optimization.OptimizationStats(
+        iterations = has_destats ? get(sol.destats, :iters, 10) : (has_t ? length(sol.t) - 1 : 10),
+        time = has_t ? sol.t[end] : 0.0,
+        fevals = has_destats ? get(sol.destats, :f_calls, 0) : 0,
+        gevals = has_destats ? get(sol.destats, :iters, 0) : 0,
+        hevals = 0
+    )
 
     SciMLBase.build_solution(cache, cache.opt, sol.u, cache.f(sol.u, p);
         retcode = ReturnCode.Success,
         stats = stats
     )
 end
 
+function solve_dae_mass_matrix(cache, dt, maxit, u0, p)
+    if cache.f.cons === nothing
+        return solve_ode(cache, dt, maxit, u0, p)
+    end
+    x=u0
+    cons_vals = cache.f.cons(x, p)
+    n = length(u0)
+    m = length(cons_vals)
+    u0_extended = vcat(u0, zeros(m))
+    M = Diagonal(ones(n + m))
+
+
+    function f_mass!(du, u, p_, t)
+        x = @view u[1:n]
+        λ = @view u[n+1:end]
+        grad_f = similar(x)
+        if cache.f.grad !== nothing
+            cache.f.grad(grad_f, x, p_)
+        else
+            grad_f .= ForwardDiff.gradient(z -> cache.f.f(z, p_), x)
+        end
+        J = Matrix{eltype(x)}(undef, m, n)
+        cache.f.cons_j !== nothing && cache.f.cons_j(J, x)
+
+        @. du[1:n] = -grad_f - (J' * λ)
+        consv = cache.f.cons(x, p_)
+        @. du[n+1:end] = consv
+        return nothing
+    end
+
+    if m == 0
+        optf = ODEFunction(f_mass!)
+        prob = ODEProblem(optf, u0, (0.0, 1.0), p)
+        return solve(prob, cache.opt.solver; dt=dt, maxiters=maxit)
+    end
+
+    ss_prob = SteadyStateProblem(ODEFunction(f_mass!, mass_matrix = M), u0_extended, p)
+
+    solve_kwargs = setup_progress_callback(cache, Dict())
+    if maxit !== nothing; solve_kwargs[:maxiters] = maxit; end
+    if dt   !== nothing; solve_kwargs[:dt] = dt; end
+
+    sol = solve(ss_prob, DynamicSS(cache.opt.solver); solve_kwargs...)
+    # if sol.retcode ≠ ReturnCode.Success
+    #     # you may still accept Default or warn
+    # end
+    u_ext = sol.u  
+    u_final = u_ext[1:n]
+    return SciMLBase.build_solution(cache, cache.opt, u_final, cache.f(u_final, p);
+        retcode = sol.retcode)
+end
+
+
+function solve_dae_indexing(cache, dt, maxit, u0, p, differential_vars)
+    if cache.f.cons === nothing
+        return solve_ode(cache, dt, maxit, u0, p)
+    end
+    x=u0
+    cons_vals = cache.f.cons(x, p)
+    n = length(u0)
+    m = length(cons_vals)
+    u0_ext = vcat(u0, zeros(m))
+    du0_ext = zeros(n + m)
+
+    if differential_vars === nothing
+        differential_vars = vcat(fill(true, n), fill(false, m))
+    else
+        if length(differential_vars) == n
+            differential_vars = vcat(differential_vars, fill(false, m))
+        elseif length(differential_vars) == n + m
+            # use as is
+        else
+            error("differential_vars length must be number of variables ($n) or extended size ($(n+m))")
+        end
+    end
+
+    function dae_residual!(res, du, u, p_, t)
+        x = @view u[1:n]
+        λ = @view u[n+1:end]
+        du_x = @view du[1:n]
+        grad_f = similar(x)
+        cache.f.grad(grad_f, x, p_)
+        J = zeros(m, n)
+        cache.f.cons_j !== nothing && cache.f.cons_j(J, x)
+
+        @. res[1:n] = du_x + grad_f + J' * λ
+        consv = cache.f.cons(x, p_)
+        @. res[n+1:end] = consv
+        return nothing
+    end
+
+    if m == 0
+        optf = ODEFunction(dae_residual!, differential_vars = differential_vars)
+        prob = ODEProblem(optf, du0_ext, (0.0, 1.0), p)
+        return solve(prob, HighOrderDescent(); dt=dt, maxiters=maxit)
+    end
+
+    tspan = (0.0, 10.0)
+    prob = DAEProblem(dae_residual!, du0_ext, u0_ext, tspan, p;
+                      differential_vars = differential_vars)
+
+    solve_kwargs = setup_progress_callback(cache, Dict())
+    if maxit !== nothing; solve_kwargs[:maxiters] = maxit; end
+    if dt !== nothing; solve_kwargs[:dt] = dt; end
+    if hasfield(typeof(cache.opt.solver), :initializealg)
+        solve_kwargs[:initializealg] = BrownFullBasicInit()
+    end
+
+    sol = solve(prob, cache.opt.solver; solve_kwargs...)
+    u_ext = sol.u
+    u_final = u_ext[end][1:n]
+
+    return SciMLBase.build_solution(cache, cache.opt, u_final, cache.f(u_final, p);
+        retcode = sol.retcode)
 end
+
+
+end