diff --git a/HISTORY.md b/HISTORY.md index 038968ef..0199ae86 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,9 @@ # AdvancedHMC Changelog +## 0.9.0 + + - The parameter ordering for `neg_energy` has changed to improve consistency. Use `neg_energy(h, θ, r)` instead of `neg_energy(h, r, θ)`, to compute the negative energy of a Hamiltonian `h` for parameter `θ` and momentum `r`, This is now consistent with the rest of the AHMC interface, like `phasepoint(h, θ, r)`. + ## 0.8.0 - To make an MCMC transtion from phasepoint `z` using trajectory `τ`(or HMCKernel `κ`) under Hamiltonian `h`, use `transition(h, τ, z)` or `transition(rng, h, τ, z)`(if using HMCKernel, use `transition(h, κ, z)` or `transition(rng, h, κ, z)`). diff --git a/Project.toml b/Project.toml index 599dae3c..77e85396 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "AdvancedHMC" uuid = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d" -version = "0.8.0" +version = "0.9.0" [deps] AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001" diff --git a/docs/Project.toml b/docs/Project.toml index c48bd544..b43b5188 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -4,6 +4,6 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" [compat] -AdvancedHMC = "0.8" +AdvancedHMC = "0.9" Documenter = "1" DocumenterCitations = "1" \ No newline at end of file diff --git a/src/hamiltonian.jl b/src/hamiltonian.jl index f7f23b31..8e7ba93a 100644 --- a/src/hamiltonian.jl +++ b/src/hamiltonian.jl @@ -67,11 +67,12 @@ function ∂H∂r(h::Hamiltonian{<:DenseEuclideanMetric,<:GaussianKinetic}, r::A return M⁻¹ * r end -# TODO (kai) make the order of θ and r consistent with neg_energy # TODO (kai) add stricter types to block hamiltonian.jl#L37 from working on unknown metric/kinetic # The gradient of a position-dependent Hamiltonian system depends on both θ and r. ∂H∂θ(h::Hamiltonian, θ::AbstractVecOrMat, r::AbstractVecOrMat) = ∂H∂θ(h, θ) -∂H∂r(h::Hamiltonian, θ::AbstractVecOrMat, r::AbstractVecOrMat) = ∂H∂r(h, r) +function ∂H∂r(h::Hamiltonian, θ::AbstractVecOrMat, r::AbstractVecOrMat) + return DualValue(neg_energy(h, θ, r), ∂H∂r(h, r)) +end struct PhasePoint{T<:AbstractVecOrMat{<:AbstractFloat},V<:DualValue} θ::T # Position variables / model parameters. @@ -101,7 +102,7 @@ function Base.similar(z::PhasePoint{<:AbstractVecOrMat{T}}) where {T<:AbstractFl end function phasepoint( - h::Hamiltonian, θ::T, r::T; ℓπ=∂H∂θ(h, θ), ℓκ=DualValue(neg_energy(h, r, θ), ∂H∂r(h, r)) + h::Hamiltonian, θ::T, r::T; ℓπ=∂H∂θ(h, θ), ℓκ=∂H∂r(h, θ, r) ) where {T<:AbstractVecOrMat} return PhasePoint(θ, r, ℓπ, ℓκ) end @@ -110,12 +111,7 @@ end # move the momentum variable to that of the position variable. # This is needed for AHMC to work with CuArrays and other Arrays (without depending on it). function phasepoint( - h::Hamiltonian, - θ::T1, - _r::T2; - r=safe_rsimilar(θ, _r), - ℓπ=∂H∂θ(h, θ), - ℓκ=DualValue(neg_energy(h, r, θ), ∂H∂r(h, r)), + h::Hamiltonian, θ::T1, _r::T2; r=safe_rsimilar(θ, _r), ℓπ=∂H∂θ(h, θ), ℓκ=∂H∂r(h, θ, r) ) where {T1<:AbstractVecOrMat,T2<:AbstractVecOrMat} return PhasePoint(θ, r, ℓπ, ℓκ) end @@ -141,31 +137,31 @@ neg_energy(h::Hamiltonian, θ::AbstractVecOrMat) = h.ℓπ(θ) # GaussianKinetic function neg_energy( - h::Hamiltonian{<:UnitEuclideanMetric,<:GaussianKinetic}, r::T, θ::T + h::Hamiltonian{<:UnitEuclideanMetric,<:GaussianKinetic}, θ::T, r::T ) where {T<:AbstractVector} return -sum(abs2, r) / 2 end function neg_energy( - h::Hamiltonian{<:UnitEuclideanMetric,<:GaussianKinetic}, r::T, θ::T + h::Hamiltonian{<:UnitEuclideanMetric,<:GaussianKinetic}, θ::T, r::T ) where {T<:AbstractMatrix} return -vec(sum(abs2, r; dims=1)) / 2 end function neg_energy( - h::Hamiltonian{<:DiagEuclideanMetric,<:GaussianKinetic}, r::T, θ::T + h::Hamiltonian{<:DiagEuclideanMetric,<:GaussianKinetic}, θ::T, r::T ) where {T<:AbstractVector} return -sum(abs2.(r) .* h.metric.M⁻¹) / 2 end function neg_energy( - h::Hamiltonian{<:DiagEuclideanMetric,<:GaussianKinetic}, r::T, θ::T + h::Hamiltonian{<:DiagEuclideanMetric,<:GaussianKinetic}, θ::T, r::T ) where {T<:AbstractMatrix} return -vec(sum(abs2.(r) .* h.metric.M⁻¹; dims=1)) / 2 end function neg_energy( - h::Hamiltonian{<:DenseEuclideanMetric,<:GaussianKinetic}, r::T, θ::T + h::Hamiltonian{<:DenseEuclideanMetric,<:GaussianKinetic}, θ::T, r::T ) where {T<:AbstractVecOrMat} mul!(h.metric._temp, h.metric.M⁻¹, r) return -dot(r, h.metric._temp) / 2 diff --git a/src/riemannian/hamiltonian.jl b/src/riemannian/hamiltonian.jl index 6f051ffb..15b52b47 100644 --- a/src/riemannian/hamiltonian.jl +++ b/src/riemannian/hamiltonian.jl @@ -72,9 +72,10 @@ function step( end #! Eq (17) of Girolami & Calderhead (2011) θ_full = copy(θ_init) - term_1 = ∂H∂r(h, θ_init, r_half) # unchanged across the loop + term_1 = ∂H∂r(h, θ_init, r_half).gradient # unchanged across the loop for j in 1:(lf.n) - θ_full = θ_init + ϵ / 2 * (term_1 + ∂H∂r(h, θ_full, r_half)) + grad_r = ∂H∂r(h, θ_full, r_half).gradient + θ_full = θ_init + ϵ / 2 * (term_1 + grad_r) # println("θ_full :", θ_full) end #! Eq (18) of Girolami & Calderhead (2011) @@ -103,7 +104,6 @@ function step( return res end -# TODO Make the order of θ and r consistent with neg_energy ∂H∂θ(h::Hamiltonian, θ::AbstractVecOrMat, r::AbstractVecOrMat) = ∂H∂θ(h, θ) ∂H∂r(h::Hamiltonian, θ::AbstractVecOrMat, r::AbstractVecOrMat) = ∂H∂r(h, r) @@ -227,11 +227,7 @@ using LinearAlgebra: logabsdet, tr # QUES Do we want to change everything to position dependent by default? # Add θ to ∂H∂r for DenseRiemannianMetric function phasepoint( - h::Hamiltonian{<:DenseRiemannianMetric}, - θ::T, - r::T; - ℓπ=∂H∂θ(h, θ), - ℓκ=DualValue(neg_energy(h, r, θ), ∂H∂r(h, θ, r)), + h::Hamiltonian{<:DenseRiemannianMetric}, θ::T, r::T; ℓπ=∂H∂θ(h, θ), ℓκ=∂H∂r(h, θ, r) ) where {T<:AbstractVecOrMat} return PhasePoint(θ, r, ℓπ, ℓκ) end @@ -239,7 +235,7 @@ end # Negative kinetic energy #! Eq (13) of Girolami & Calderhead (2011) function neg_energy( - h::Hamiltonian{<:DenseRiemannianMetric}, r::T, θ::T + h::Hamiltonian{<:DenseRiemannianMetric}, θ::T, r::T ) where {T<:AbstractVecOrMat} G = h.metric.map(h.metric.G(θ)) D = size(G, 1) @@ -347,12 +343,6 @@ function ∂H∂r( h::Hamiltonian{<:DenseRiemannianMetric}, θ::AbstractVecOrMat, r::AbstractVecOrMat ) H = h.metric.G(θ) - # if !all(isfinite, H) - # println("θ: ", θ) - # println("H: ", H) - # end G = h.metric.map(H) - # return inv(G) * r - # println("G \ r: ", G \ r) - return G \ r # NOTE it's actually pretty weird that ∂H∂θ returns DualValue but ∂H∂r doesn't + return DualValue(neg_energy(h, θ, r), G \ r) end diff --git a/src/riemannian/integrator.jl b/src/riemannian/integrator.jl index 6ce59476..38452685 100644 --- a/src/riemannian/integrator.jl +++ b/src/riemannian/integrator.jl @@ -74,9 +74,10 @@ function step( end # eq (17) of Girolami & Calderhead (2011) θ_full = θ_init - term_1 = ∂H∂r(h, θ_init, r_half) # unchanged across the loop + term_1 = ∂H∂r(h, θ_init, r_half).gradient # unchanged across the loop for j in 1:(lf.n) - θ_full = θ_init + ϵ / 2 * (term_1 + ∂H∂r(h, θ_full, r_half)) + grad_r = ∂H∂r(h, θ_full, r_half).gradient + θ_full = θ_init + ϵ / 2 * (term_1 + grad_r) end # eq (18) of Girolami & Calderhead (2011) (; value, gradient) = ∂H∂θ(h, θ_full, r_half) diff --git a/test/hamiltonian.jl b/test/hamiltonian.jl index 52a54cc8..781d96d5 100644 --- a/test/hamiltonian.jl +++ b/test/hamiltonian.jl @@ -60,19 +60,19 @@ end r_init = randn(T, D) h = Hamiltonian(UnitEuclideanMetric(T, D), ℓπ, ∂ℓπ∂θ) - @test -AdvancedHMC.neg_energy(h, r_init, θ_init) == sum(abs2, r_init) / 2 + @test -AdvancedHMC.neg_energy(h, θ_init, r_init) == sum(abs2, r_init) / 2 @test AdvancedHMC.∂H∂r(h, r_init) == r_init M⁻¹ = ones(T, D) + abs.(randn(T, D)) h = Hamiltonian(DiagEuclideanMetric(M⁻¹), ℓπ, ∂ℓπ∂θ) - @test -AdvancedHMC.neg_energy(h, r_init, θ_init) ≈ + @test -AdvancedHMC.neg_energy(h, θ_init, r_init) ≈ r_init' * diagm(0 => M⁻¹) * r_init / 2 @test AdvancedHMC.∂H∂r(h, r_init) == M⁻¹ .* r_init m = randn(T, D, D) M⁻¹ = m' * m h = Hamiltonian(DenseEuclideanMetric(M⁻¹), ℓπ, ∂ℓπ∂θ) - @test -AdvancedHMC.neg_energy(h, r_init, θ_init) ≈ r_init' * M⁻¹ * r_init / 2 + @test -AdvancedHMC.neg_energy(h, θ_init, r_init) ≈ r_init' * M⁻¹ * r_init / 2 @test AdvancedHMC.∂H∂r(h, r_init) == M⁻¹ * r_init end end @@ -86,7 +86,7 @@ end r_init = ComponentArray(; a=randn(T, D), b=randn(T, D)) h = Hamiltonian(UnitEuclideanMetric(T, 2 * D), ℓπ, ∂ℓπ∂θ) - @test -AdvancedHMC.neg_energy(h, r_init, θ_init) == sum(abs2, r_init) / 2 + @test -AdvancedHMC.neg_energy(h, θ_init, r_init) == sum(abs2, r_init) / 2 @test AdvancedHMC.∂H∂r(h, r_init) == r_init @test typeof(AdvancedHMC.∂H∂r(h, r_init)) == typeof(r_init) @@ -94,7 +94,7 @@ end a=ones(T, D) + abs.(randn(T, D)), b=ones(T, D) + abs.(randn(T, D)) ) h = Hamiltonian(DiagEuclideanMetric(M⁻¹), ℓπ, ∂ℓπ∂θ) - @test -AdvancedHMC.neg_energy(h, r_init, θ_init) ≈ + @test -AdvancedHMC.neg_energy(h, θ_init, r_init) ≈ r_init' * diagm(0 => M⁻¹) * r_init / 2 @test AdvancedHMC.∂H∂r(h, r_init) == M⁻¹ .* r_init @test typeof(AdvancedHMC.∂H∂r(h, r_init)) == typeof(r_init) @@ -103,7 +103,7 @@ end ax = getaxes(r_init)[1] M⁻¹ = ComponentArray(m' * m, ax, ax) h = Hamiltonian(DenseEuclideanMetric(M⁻¹), ℓπ, ∂ℓπ∂θ) - @test -AdvancedHMC.neg_energy(h, r_init, θ_init) ≈ r_init' * M⁻¹ * r_init / 2 + @test -AdvancedHMC.neg_energy(h, θ_init, r_init) ≈ r_init' * M⁻¹ * r_init / 2 @test all(AdvancedHMC.∂H∂r(h, r_init) .== M⁻¹ * r_init) @test typeof(AdvancedHMC.∂H∂r(h, r_init)) == typeof(r_init) end