Skip to content

Make ∂H∂r consistent with ∂H∂θ #458

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# AdvancedHMC Changelog

## 0.9.0

- The parameter ordering for `neg_energy` has changed to improve consistency. Use `neg_energy(h, θ, r)` instead of `neg_energy(h, r, θ)`, to compute the negative energy of a Hamiltonian `h` for parameter `θ` and momentum `r`, This is now consistent with the rest of the AHMC interface, like `phasepoint(h, θ, r)`.

## 0.8.0

- To make an MCMC transtion from phasepoint `z` using trajectory `τ`(or HMCKernel `κ`) under Hamiltonian `h`, use `transition(h, τ, z)` or `transition(rng, h, τ, z)`(if using HMCKernel, use `transition(h, κ, z)` or `transition(rng, h, κ, z)`).
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "AdvancedHMC"
uuid = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d"
version = "0.8.0"
version = "0.9.0"

[deps]
AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
Expand Down
2 changes: 1 addition & 1 deletion docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"

[compat]
AdvancedHMC = "0.8"
AdvancedHMC = "0.9"
Documenter = "1"
DocumenterCitations = "1"
24 changes: 10 additions & 14 deletions src/hamiltonian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,12 @@ function ∂H∂r(h::Hamiltonian{<:DenseEuclideanMetric,<:GaussianKinetic}, r::A
return M⁻¹ * r
end

# TODO (kai) make the order of θ and r consistent with neg_energy
# TODO (kai) add stricter types to block hamiltonian.jl#L37 from working on unknown metric/kinetic
# The gradient of a position-dependent Hamiltonian system depends on both θ and r.
∂H∂θ(h::Hamiltonian, θ::AbstractVecOrMat, r::AbstractVecOrMat) = ∂H∂θ(h, θ)
∂H∂r(h::Hamiltonian, θ::AbstractVecOrMat, r::AbstractVecOrMat) = ∂H∂r(h, r)
function ∂H∂r(h::Hamiltonian, θ::AbstractVecOrMat, r::AbstractVecOrMat)
Copy link
Member

@yebai yebai Jun 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to remove these too.

# TODO Make the order of θ and r consistent with neg_energy
∂H∂θ(h::Hamiltonian, θ::AbstractVecOrMat, r::AbstractVecOrMat) = ∂H∂θ(h, θ)
∂H∂r(h::Hamiltonian, θ::AbstractVecOrMat, r::AbstractVecOrMat) = ∂H∂r(h, r)

return DualValue(neg_energy(h, θ, r), ∂H∂r(h, r))
end

struct PhasePoint{T<:AbstractVecOrMat{<:AbstractFloat},V<:DualValue}
θ::T # Position variables / model parameters.
Expand Down Expand Up @@ -101,7 +102,7 @@ function Base.similar(z::PhasePoint{<:AbstractVecOrMat{T}}) where {T<:AbstractFl
end

function phasepoint(
h::Hamiltonian, θ::T, r::T; ℓπ=∂H∂θ(h, θ), ℓκ=DualValue(neg_energy(h, r, θ), ∂H∂r(h, r))
h::Hamiltonian, θ::T, r::T; ℓπ=∂H∂θ(h, θ), ℓκ=∂H∂r(h, θ, r)
) where {T<:AbstractVecOrMat}
return PhasePoint(θ, r, ℓπ, ℓκ)
end
Expand All @@ -110,12 +111,7 @@ end
# move the momentum variable to that of the position variable.
# This is needed for AHMC to work with CuArrays and other Arrays (without depending on it).
function phasepoint(
h::Hamiltonian,
θ::T1,
_r::T2;
r=safe_rsimilar(θ, _r),
ℓπ=∂H∂θ(h, θ),
ℓκ=DualValue(neg_energy(h, r, θ), ∂H∂r(h, r)),
h::Hamiltonian, θ::T1, _r::T2; r=safe_rsimilar(θ, _r), ℓπ=∂H∂θ(h, θ), ℓκ=∂H∂r(h, θ, r)
) where {T1<:AbstractVecOrMat,T2<:AbstractVecOrMat}
return PhasePoint(θ, r, ℓπ, ℓκ)
end
Expand All @@ -141,31 +137,31 @@ neg_energy(h::Hamiltonian, θ::AbstractVecOrMat) = h.ℓπ(θ)
# GaussianKinetic

function neg_energy(
h::Hamiltonian{<:UnitEuclideanMetric,<:GaussianKinetic}, r::T, θ::T
h::Hamiltonian{<:UnitEuclideanMetric,<:GaussianKinetic}, θ::T, r::T
) where {T<:AbstractVector}
return -sum(abs2, r) / 2
end

function neg_energy(
h::Hamiltonian{<:UnitEuclideanMetric,<:GaussianKinetic}, r::T, θ::T
h::Hamiltonian{<:UnitEuclideanMetric,<:GaussianKinetic}, θ::T, r::T
) where {T<:AbstractMatrix}
return -vec(sum(abs2, r; dims=1)) / 2
end

function neg_energy(
h::Hamiltonian{<:DiagEuclideanMetric,<:GaussianKinetic}, r::T, θ::T
h::Hamiltonian{<:DiagEuclideanMetric,<:GaussianKinetic}, θ::T, r::T
) where {T<:AbstractVector}
return -sum(abs2.(r) .* h.metric.M⁻¹) / 2
end

function neg_energy(
h::Hamiltonian{<:DiagEuclideanMetric,<:GaussianKinetic}, r::T, θ::T
h::Hamiltonian{<:DiagEuclideanMetric,<:GaussianKinetic}, θ::T, r::T
) where {T<:AbstractMatrix}
return -vec(sum(abs2.(r) .* h.metric.M⁻¹; dims=1)) / 2
end

function neg_energy(
h::Hamiltonian{<:DenseEuclideanMetric,<:GaussianKinetic}, r::T, θ::T
h::Hamiltonian{<:DenseEuclideanMetric,<:GaussianKinetic}, θ::T, r::T
) where {T<:AbstractVecOrMat}
mul!(h.metric._temp, h.metric.M⁻¹, r)
return -dot(r, h.metric._temp) / 2
Expand Down
22 changes: 6 additions & 16 deletions src/riemannian/hamiltonian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,10 @@
end
#! Eq (17) of Girolami & Calderhead (2011)
θ_full = copy(θ_init)
term_1 = ∂H∂r(h, θ_init, r_half) # unchanged across the loop
term_1 = ∂H∂r(h, θ_init, r_half).gradient # unchanged across the loop

Check warning on line 75 in src/riemannian/hamiltonian.jl

View check run for this annotation

Codecov / codecov/patch

src/riemannian/hamiltonian.jl#L75

Added line #L75 was not covered by tests
Copy link
Member

@yebai yebai Jun 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This computes neg_energy but doesn't use it, which has performance implications. Maybe use a different name for functions that returns DualValue, eg: neg_energy_and_∂H∂r.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ohh, I suddenly realize the riemannian manifold HMC is not included in AdvancedHMC yet, this file is not included in the package, see

include("riemannian/integrator.jl")

I think we might start with #439, which is ready to get in now.

for j in 1:(lf.n)
θ_full = θ_init + ϵ / 2 * (term_1 + ∂H∂r(h, θ_full, r_half))
grad_r = ∂H∂r(h, θ_full, r_half).gradient
θ_full = θ_init + ϵ / 2 * (term_1 + grad_r)

Check warning on line 78 in src/riemannian/hamiltonian.jl

View check run for this annotation

Codecov / codecov/patch

src/riemannian/hamiltonian.jl#L77-L78

Added lines #L77 - L78 were not covered by tests
# println("θ_full :", θ_full)
end
#! Eq (18) of Girolami & Calderhead (2011)
Expand Down Expand Up @@ -103,7 +104,6 @@
return res
end

# TODO Make the order of θ and r consistent with neg_energy
∂H∂θ(h::Hamiltonian, θ::AbstractVecOrMat, r::AbstractVecOrMat) = ∂H∂θ(h, θ)
∂H∂r(h::Hamiltonian, θ::AbstractVecOrMat, r::AbstractVecOrMat) = ∂H∂r(h, r)

Expand Down Expand Up @@ -227,19 +227,15 @@
# QUES Do we want to change everything to position dependent by default?
# Add θ to ∂H∂r for DenseRiemannianMetric
function phasepoint(
h::Hamiltonian{<:DenseRiemannianMetric},
θ::T,
r::T;
ℓπ=∂H∂θ(h, θ),
ℓκ=DualValue(neg_energy(h, r, θ), ∂H∂r(h, θ, r)),
h::Hamiltonian{<:DenseRiemannianMetric}, θ::T, r::T; ℓπ=∂H∂θ(h, θ), ℓκ=∂H∂r(h, θ, r)
) where {T<:AbstractVecOrMat}
return PhasePoint(θ, r, ℓπ, ℓκ)
end

# Negative kinetic energy
#! Eq (13) of Girolami & Calderhead (2011)
function neg_energy(
h::Hamiltonian{<:DenseRiemannianMetric}, r::T, θ::T
h::Hamiltonian{<:DenseRiemannianMetric}, θ::T, r::T
) where {T<:AbstractVecOrMat}
G = h.metric.map(h.metric.G(θ))
D = size(G, 1)
Expand Down Expand Up @@ -347,12 +343,6 @@
h::Hamiltonian{<:DenseRiemannianMetric}, θ::AbstractVecOrMat, r::AbstractVecOrMat
)
H = h.metric.G(θ)
# if !all(isfinite, H)
# println("θ: ", θ)
# println("H: ", H)
# end
G = h.metric.map(H)
# return inv(G) * r
# println("G \ r: ", G \ r)
return G \ r # NOTE it's actually pretty weird that ∂H∂θ returns DualValue but ∂H∂r doesn't
return DualValue(neg_energy(h, θ, r), G \ r)
end
5 changes: 3 additions & 2 deletions src/riemannian/integrator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,10 @@
end
# eq (17) of Girolami & Calderhead (2011)
θ_full = θ_init
term_1 = ∂H∂r(h, θ_init, r_half) # unchanged across the loop
term_1 = ∂H∂r(h, θ_init, r_half).gradient # unchanged across the loop

Check warning on line 77 in src/riemannian/integrator.jl

View check run for this annotation

Codecov / codecov/patch

src/riemannian/integrator.jl#L77

Added line #L77 was not covered by tests
for j in 1:(lf.n)
θ_full = θ_init + ϵ / 2 * (term_1 + ∂H∂r(h, θ_full, r_half))
grad_r = ∂H∂r(h, θ_full, r_half).gradient
θ_full = θ_init + ϵ / 2 * (term_1 + grad_r)

Check warning on line 80 in src/riemannian/integrator.jl

View check run for this annotation

Codecov / codecov/patch

src/riemannian/integrator.jl#L79-L80

Added lines #L79 - L80 were not covered by tests
end
# eq (18) of Girolami & Calderhead (2011)
(; value, gradient) = ∂H∂θ(h, θ_full, r_half)
Expand Down
12 changes: 6 additions & 6 deletions test/hamiltonian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,19 @@ end
r_init = randn(T, D)

h = Hamiltonian(UnitEuclideanMetric(T, D), ℓπ, ∂ℓπ∂θ)
@test -AdvancedHMC.neg_energy(h, r_init, θ_init) == sum(abs2, r_init) / 2
@test -AdvancedHMC.neg_energy(h, θ_init, r_init) == sum(abs2, r_init) / 2
@test AdvancedHMC.∂H∂r(h, r_init) == r_init

M⁻¹ = ones(T, D) + abs.(randn(T, D))
h = Hamiltonian(DiagEuclideanMetric(M⁻¹), ℓπ, ∂ℓπ∂θ)
@test -AdvancedHMC.neg_energy(h, r_init, θ_init) ≈
@test -AdvancedHMC.neg_energy(h, θ_init, r_init) ≈
r_init' * diagm(0 => M⁻¹) * r_init / 2
@test AdvancedHMC.∂H∂r(h, r_init) == M⁻¹ .* r_init

m = randn(T, D, D)
M⁻¹ = m' * m
h = Hamiltonian(DenseEuclideanMetric(M⁻¹), ℓπ, ∂ℓπ∂θ)
@test -AdvancedHMC.neg_energy(h, r_init, θ_init) ≈ r_init' * M⁻¹ * r_init / 2
@test -AdvancedHMC.neg_energy(h, θ_init, r_init) ≈ r_init' * M⁻¹ * r_init / 2
@test AdvancedHMC.∂H∂r(h, r_init) == M⁻¹ * r_init
end
end
Expand All @@ -86,15 +86,15 @@ end
r_init = ComponentArray(; a=randn(T, D), b=randn(T, D))

h = Hamiltonian(UnitEuclideanMetric(T, 2 * D), ℓπ, ∂ℓπ∂θ)
@test -AdvancedHMC.neg_energy(h, r_init, θ_init) == sum(abs2, r_init) / 2
@test -AdvancedHMC.neg_energy(h, θ_init, r_init) == sum(abs2, r_init) / 2
@test AdvancedHMC.∂H∂r(h, r_init) == r_init
@test typeof(AdvancedHMC.∂H∂r(h, r_init)) == typeof(r_init)

M⁻¹ = ComponentArray(;
a=ones(T, D) + abs.(randn(T, D)), b=ones(T, D) + abs.(randn(T, D))
)
h = Hamiltonian(DiagEuclideanMetric(M⁻¹), ℓπ, ∂ℓπ∂θ)
@test -AdvancedHMC.neg_energy(h, r_init, θ_init) ≈
@test -AdvancedHMC.neg_energy(h, θ_init, r_init) ≈
r_init' * diagm(0 => M⁻¹) * r_init / 2
@test AdvancedHMC.∂H∂r(h, r_init) == M⁻¹ .* r_init
@test typeof(AdvancedHMC.∂H∂r(h, r_init)) == typeof(r_init)
Expand All @@ -103,7 +103,7 @@ end
ax = getaxes(r_init)[1]
M⁻¹ = ComponentArray(m' * m, ax, ax)
h = Hamiltonian(DenseEuclideanMetric(M⁻¹), ℓπ, ∂ℓπ∂θ)
@test -AdvancedHMC.neg_energy(h, r_init, θ_init) ≈ r_init' * M⁻¹ * r_init / 2
@test -AdvancedHMC.neg_energy(h, θ_init, r_init) ≈ r_init' * M⁻¹ * r_init / 2
@test all(AdvancedHMC.∂H∂r(h, r_init) .== M⁻¹ * r_init)
@test typeof(AdvancedHMC.∂H∂r(h, r_init)) == typeof(r_init)
end
Expand Down
Loading