diff --git a/Project.toml b/Project.toml
index ebe8f4ec..1c6fc038 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "AbstractGPs"
 uuid = "99985d1d-32ba-4be9-9821-2ec096f28918"
 authors = ["JuliaGaussianProcesses Team"]
-version = "0.5.16"
+version = "0.5.17"
 
 [deps]
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
diff --git a/src/AbstractGPs.jl b/src/AbstractGPs.jl
index efa6735a..b0cd6885 100644
--- a/src/AbstractGPs.jl
+++ b/src/AbstractGPs.jl
@@ -27,6 +27,7 @@ export rand!,
     mean_vector,
     marginals,
     logpdf,
+    approx_log_evidence,
     elbo,
     dtc,
     posterior,
diff --git a/src/abstract_gp.jl b/src/abstract_gp.jl
index 9163efc6..2fb071b3 100644
--- a/src/abstract_gp.jl
+++ b/src/abstract_gp.jl
@@ -85,3 +85,30 @@ for (m, f) in [
         )
     end
 end
+
+"""
+    approx_log_evidence(approx::<Approximation>, lfx::LatentFiniteGP, ys)
+
+Compute an approximation to the log of the marginal likelihood (also known as
+"evidence") under the given `approx`imation to the posterior. The return value
+of `approx_log_evidence` can be used to optimise the hyperparameters of `lfx`.
+"""
+function approx_log_evidence end
+
+"""
+    posterior(fx::FiniteGP, y::AbstractVector{<:Real})
+    posterior(approx::<Approximation>, fx::FiniteGP, y::AbstractVector{<:Real})
+    posterior(approx::<Approximation>, lfx::LatentFiniteGP, y::AbstractVector)
+
+Construct the posterior distribution over the latent Gaussian process (`fx.f`
+or `lfx.fx.f`), given the observations `y` corresponding to the process's
+finite projection (`fx` or `lfx`).
+
+In the two-argument form, this describes exact GP regression with `y` observed
+under a Gaussian likelihood, and returns a `PosteriorGP`.
+
+In the three-argument form, the first argument specifies the approximation to
+be used (e.g. `VFE` or defined in other packages such as ApproximateGPs.jl),
+and returns an `ApproxPosteriorGP`.
+"""
+function posterior end
diff --git a/src/deprecations.jl b/src/deprecations.jl
index 459fe675..49975b7e 100644
--- a/src/deprecations.jl
+++ b/src/deprecations.jl
@@ -5,3 +5,6 @@
 @deprecate sampleplot!(plt::RecipesBase.AbstractPlot, gp::FiniteGP, n::Int; kwargs...) sampleplot!(
     plt, gp; samples=n, kwargs...
 )
+
+@deprecate elbo(dtc::DTC, fx, y) approx_log_evidence(dtc, fx, y)
+@deprecate dtc(vfe::Union{VFE,DTC}, fx, y) approx_log_evidence(vfe, fx, y)
diff --git a/src/exact_gpr_posterior.jl b/src/exact_gpr_posterior.jl
index fd0d265b..5cbe98f5 100644
--- a/src/exact_gpr_posterior.jl
+++ b/src/exact_gpr_posterior.jl
@@ -3,10 +3,18 @@ struct PosteriorGP{Tprior,Tdata} <: AbstractGP
     data::Tdata
 end
 
+struct ExactInference end
+
+posterior(::ExactInference, fx::FiniteGP, y::AbstractVector{<:Real}) = posterior(fx, y)
+
+function approx_log_evidence(::ExactInference, fx::FiniteGP, y::AbstractVector{<:Real})
+    return logpdf(fx, y)
+end
+
 """
     posterior(fx::FiniteGP, y::AbstractVector{<:Real})
 
-Construct the posterior distribution over `fx.f` given observations `y` at `x` made under
+Construct the posterior distribution over `fx.f` given observations `y` at `fx.x` made under
 noise `fx.Σy`. This is another `AbstractGP` object. See chapter 2 of [1] for a recap on
 exact inference in GPs. This posterior process has mean function
 ```julia
diff --git a/src/sparse_approximations.jl b/src/sparse_approximations.jl
index 5393a35f..99b25f35 100644
--- a/src/sparse_approximations.jl
+++ b/src/sparse_approximations.jl
@@ -13,7 +13,14 @@ struct VFE{Tfz<:FiniteGP}
     fz::Tfz
 end
 
-const DTC = VFE
+"""
+    DTC(fz::FiniteGP)
+
+Similar to `VFE`, but uses a different objective for `approx_log_evidence`.
+"""
+struct DTC{Tfz<:FiniteGP}
+    fz::Tfz
+end
 
 struct ApproxPosteriorGP{Tapprox,Tprior,Tdata} <: AbstractGP
     approx::Tapprox
@@ -48,7 +55,7 @@ true
 processes". In: Proceedings of the Twelfth International Conference on Artificial
 Intelligence and Statistics. 2009.
 """
-function posterior(vfe::VFE, fx::FiniteGP, y::AbstractVector{<:Real})
+function posterior(vfe::Union{VFE,DTC}, fx::FiniteGP, y::AbstractVector{<:Real})
     @assert vfe.fz.f === fx.f
 
     U_y = _cholesky(_symmetric(fx.Σy)).U
@@ -69,7 +76,7 @@ end
 
 """
     function update_posterior(
-        f_post_approx::ApproxPosteriorGP{<:VFE},
+        f_post_approx::ApproxPosteriorGP{<:Union{VFE,DTC}},
         fx::FiniteGP,
         y::AbstractVector{<:Real}
     )
@@ -78,7 +85,9 @@ Update the `ApproxPosteriorGP` given a new set of observations. Here, we retain
 set of pseudo-points.
 """
 function update_posterior(
-    f_post_approx::ApproxPosteriorGP{<:VFE}, fx::FiniteGP, y::AbstractVector{<:Real}
+    f_post_approx::ApproxPosteriorGP{<:Union{VFE,DTC}},
+    fx::FiniteGP,
+    y::AbstractVector{<:Real},
 )
     @assert f_post_approx.prior === fx.f
 
@@ -111,14 +120,14 @@ end
 
 """
     function update_posterior(
-        f_post_approx::ApproxPosteriorGP{<:VFE},
+        f_post_approx::ApproxPosteriorGP{<:Union{VFE,DTC}},
         z::FiniteGP,
     )
 
 Update the `ApproxPosteriorGP` given a new set of pseudo-points to append to the existing 
 set of pseudo-points.
 """
-function update_posterior(f_post_approx::ApproxPosteriorGP{<:VFE}, fz::FiniteGP)
+function update_posterior(f_post_approx::ApproxPosteriorGP{<:Union{VFE,DTC}}, fz::FiniteGP)
     @assert f_post_approx.prior === fz.f
 
     z_old = inducing_points(f_post_approx)
@@ -161,48 +170,56 @@ function update_posterior(f_post_approx::ApproxPosteriorGP{<:VFE}, fz::FiniteGP)
         x=f_post_approx.data.x,
         Σy=f_post_approx.data.Σy,
     )
-    return ApproxPosteriorGP(VFE(fz_new), f_post_approx.prior, cache)
+    return ApproxPosteriorGP(
+        _update_approx(f_post_approx.approx, fz_new), f_post_approx.prior, cache
+    )
 end
 
+_update_approx(vfe::VFE, fz_new::FiniteGP) = VFE(fz_new)
+_update_approx(dtc::DTC, fz_new::FiniteGP) = DTC(fz_new)
+
 # AbstractGP interface implementation.
 
-function Statistics.mean(f::ApproxPosteriorGP{<:VFE}, x::AbstractVector)
+function Statistics.mean(f::ApproxPosteriorGP{<:Union{VFE,DTC}}, x::AbstractVector)
     return mean(f.prior, x) + cov(f.prior, x, inducing_points(f)) * f.data.α
 end
 
-function Statistics.cov(f::ApproxPosteriorGP{<:VFE}, x::AbstractVector)
+function Statistics.cov(f::ApproxPosteriorGP{<:Union{VFE,DTC}}, x::AbstractVector)
     A = f.data.U' \ cov(f.prior, inducing_points(f), x)
     return cov(f.prior, x) - At_A(A) + Xt_invA_X(f.data.Λ_ε, A)
 end
 
-function Statistics.var(f::ApproxPosteriorGP{<:VFE}, x::AbstractVector)
+function Statistics.var(f::ApproxPosteriorGP{<:Union{VFE,DTC}}, x::AbstractVector)
     A = f.data.U' \ cov(f.prior, inducing_points(f), x)
     return var(f.prior, x) - diag_At_A(A) + diag_Xt_invA_X(f.data.Λ_ε, A)
 end
 
-function Statistics.cov(f::ApproxPosteriorGP{<:VFE}, x::AbstractVector, y::AbstractVector)
+function Statistics.cov(
+    f::ApproxPosteriorGP{<:Union{VFE,DTC}}, x::AbstractVector, y::AbstractVector
+)
     A_zx = f.data.U' \ cov(f.prior, inducing_points(f), x)
     A_zy = f.data.U' \ cov(f.prior, inducing_points(f), y)
     return cov(f.prior, x, y) - A_zx'A_zy + Xt_invA_Y(A_zx, f.data.Λ_ε, A_zy)
 end
 
-function StatsBase.mean_and_cov(f::ApproxPosteriorGP{<:VFE}, x::AbstractVector)
+function StatsBase.mean_and_cov(f::ApproxPosteriorGP{<:Union{VFE,DTC}}, x::AbstractVector)
     A = f.data.U' \ cov(f.prior, inducing_points(f), x)
     m_post = mean(f.prior, x) + A' * f.data.m_ε
     C_post = cov(f.prior, x) - At_A(A) + Xt_invA_X(f.data.Λ_ε, A)
     return m_post, C_post
 end
 
-function StatsBase.mean_and_var(f::ApproxPosteriorGP{<:VFE}, x::AbstractVector)
+function StatsBase.mean_and_var(f::ApproxPosteriorGP{<:Union{VFE,DTC}}, x::AbstractVector)
     A = f.data.U' \ cov(f.prior, inducing_points(f), x)
     m_post = mean(f.prior, x) + A' * f.data.m_ε
     c_post = var(f.prior, x) - diag_At_A(A) + diag_Xt_invA_X(f.data.Λ_ε, A)
     return m_post, c_post
 end
 
-inducing_points(f::ApproxPosteriorGP{<:VFE}) = f.approx.fz.x
+inducing_points(f::ApproxPosteriorGP{<:Union{VFE,DTC}}) = f.approx.fz.x
 
 """
+    approx_log_evidence(vfe::VFE, fx::FiniteGP, y::AbstractVector{<:Real})
     elbo(vfe::VFE, fx::FiniteGP, y::AbstractVector{<:Real})
 
 The Titsias Evidence Lower BOund (ELBO) [1]. `y` are observations of `fx`, and `v.z`
@@ -228,14 +245,16 @@ true
 processes". In: Proceedings of the Twelfth International Conference on Artificial
 Intelligence and Statistics. 2009.
 """
-function elbo(vfe::VFE, fx::FiniteGP, y::AbstractVector{<:Real})
+function approx_log_evidence(vfe::VFE, fx::FiniteGP, y::AbstractVector{<:Real})
     @assert vfe.fz.f === fx.f
-    _dtc, A = _compute_intermediates(fx, y, vfe.fz)
-    return _dtc - (tr_Cf_invΣy(fx, fx.Σy) - sum(abs2, A)) / 2
+    dtc_objective, A = _compute_intermediates(fx, y, vfe.fz)
+    return dtc_objective - (tr_Cf_invΣy(fx, fx.Σy) - sum(abs2, A)) / 2
 end
 
+elbo(vfe::VFE, fx, y) = approx_log_evidence(vfe, fx, y)
+
 """
-    dtc(v::VFE, fx::FiniteGP, y::AbstractVector{<:Real})
+    approx_log_evidence(dtc::DTC, fx::FiniteGP, y::AbstractVector{<:Real})
 
 The Deterministic Training Conditional (DTC) [1]. `y` are observations of `fx`, and `v.z`
 are inducing points.
@@ -248,11 +267,11 @@ julia> x = randn(1000);
 
 julia> z = range(-5.0, 5.0; length=256);
 
-julia> v = VFE(f(z));
+julia> d = DTC(f(z));
 
 julia> y = rand(f(x, 0.1));
 
-julia> isapprox(dtc(v, f(x, 0.1), y), logpdf(f(x, 0.1), y); atol=1e-6, rtol=1e-6)
+julia> isapprox(approx_log_evidence(d, f(x, 0.1), y), logpdf(f(x, 0.1), y); atol=1e-6, rtol=1e-6)
 true
 ```
 
@@ -260,13 +279,13 @@ true
 Sparse Gaussian Process Regression". In: Proceedings of the Ninth International Workshop on
 Artificial Intelligence and Statistics. 2003
 """
-function dtc(vfe::VFE, fx::FiniteGP, y::AbstractVector{<:Real})
-    @assert vfe.fz.f === fx.f
-    _dtc, _ = _compute_intermediates(fx, y, vfe.fz)
-    return _dtc
+function approx_log_evidence(dtc::DTC, fx::FiniteGP, y::AbstractVector{<:Real})
+    @assert dtc.fz.f === fx.f
+    dtc_objective, _ = _compute_intermediates(fx, y, dtc.fz)
+    return dtc_objective
 end
 
-# Factor out computations common to the `elbo` and `dtc`.
+# Factor out computations of `approx_log_evidence` common to `VFE` and `DTC`
 function _compute_intermediates(fx::FiniteGP, y::AbstractVector{<:Real}, fz::FiniteGP)
     length(fx) == length(y) || throw(
         DimensionMismatch(
diff --git a/test/sparse_approximations.jl b/test/sparse_approximations.jl
index 3187e8a5..375ee2a7 100644
--- a/test/sparse_approximations.jl
+++ b/test/sparse_approximations.jl
@@ -1,133 +1,136 @@
-@testset "approx_posterior_gp" begin
-    rng = MersenneTwister(123456)
-    N_cond = 3
-    N_a = 5
-    N_b = 6
-
-    # Specify prior.
-    f = GP(sin, Matern32Kernel())
-
-    # Sample from prior.
-    x = collect(range(-1.0, 1.0; length=N_cond))
-    fx = f(x, 1e-15)
-    y = rand(rng, fx)
-
-    # Construct posterior.
-    f_post = posterior(fx, y)
-
-    # Construct optimal approximate posterior.
-    f_approx_post = posterior(VFE(f(x, 1e-12)), fx, y)
-
-    # Verify that approximate posterior ≈ posterior at various inputs.
-    x_test = randn(rng, 100)
-    @test mean(f_post, x_test) ≈ mean(f_approx_post, x_test)
-    @test cov(f_post, x_test) ≈ cov(f_approx_post, x_test)
-
-    # Verify that AbstractGP interface is implemented fully and consistently.
-    a = collect(range(-1.0, 1.0; length=N_a))
-    b = randn(rng, N_b)
-    TestUtils.test_internal_abstractgps_interface(rng, f_approx_post, a, b)
-
-    @testset "update_posterior (new observation)" begin
-        rng = MersenneTwister(1)
-        X = rand(rng, 10)
-        y = rand(rng, 10)
-        Z = rand(rng, 4)
-
-        f = GP(SqExponentialKernel())
-
-        # online learning
-        p_fx1 = posterior(VFE(f(Z)), f(X[1:7], 0.1), y[1:7])
-        u_p_fx1 = update_posterior(p_fx1, f(X[8:10], 0.1), y[8:10])
-
-        # batch learning
-        p_fx2 = posterior(VFE(f(Z)), f(X, 0.1), y)
-
-        @test inducing_points(u_p_fx1) ≈ inducing_points(p_fx2) atol = 1e-5
-        @test u_p_fx1.data.m_ε ≈ p_fx2.data.m_ε atol = 1e-5
-        @test u_p_fx1.data.Λ_ε.U ≈ p_fx2.data.Λ_ε.U atol = 1e-5
-        @test u_p_fx1.data.U ≈ p_fx2.data.U atol = 1e-5
-        @test u_p_fx1.data.α ≈ p_fx2.data.α atol = 1e-5
-        @test u_p_fx1.data.b_y ≈ p_fx2.data.b_y atol = 1e-5
-        @test u_p_fx1.data.B_εf ≈ p_fx2.data.B_εf atol = 1e-5
-        @test u_p_fx1.data.x ≈ p_fx2.data.x atol = 1e-5
-        @test u_p_fx1.data.Σy ≈ p_fx2.data.Σy atol = 1e-5
-    end
-
-    @testset "update_posterior (new pseudo-points)" begin
-        rng = MersenneTwister(1)
-        X = rand(rng, 10)
-        y = rand(rng, 10)
-        Z1 = rand(rng, 4)
-        Z2 = rand(rng, 3)
-        Z = vcat(Z1, Z2)
-
-        f = GP(SqExponentialKernel())
-
-        # Sequentially adding pseudo points
-        p_fx1 = posterior(VFE(f(Z1)), f(X, 0.1), y)
-        u_p_fx1 = update_posterior(p_fx1, f(Z2))
-
-        # Adding all pseudo-points at once
-        p_fx2 = posterior(VFE(f(Z)), f(X, 0.1), y)
-
-        @test inducing_points(u_p_fx1) ≈ inducing_points(p_fx2) atol = 1e-5
-        @test u_p_fx1.data.m_ε ≈ p_fx2.data.m_ε atol = 1e-5
-        @test u_p_fx1.data.Λ_ε.U ≈ p_fx2.data.Λ_ε.U atol = 1e-5
-        @test u_p_fx1.data.U ≈ p_fx2.data.U atol = 1e-5
-        @test u_p_fx1.data.α ≈ p_fx2.data.α atol = 1e-2 rtol = 1e-2
-        @test u_p_fx1.data.b_y ≈ p_fx2.data.b_y atol = 1e-5
-        @test u_p_fx1.data.B_εf ≈ p_fx2.data.B_εf atol = 1e-5
-        @test u_p_fx1.data.x ≈ p_fx2.data.x atol = 1e-5
-        @test u_p_fx1.data.Σy ≈ p_fx2.data.Σy atol = 1e-5
-    end
-
-    @testset "elbo / dtc" begin
-        x = collect(Float64, range(-1.0, 1.0; length=N_cond))
-        f = GP(SqExponentialKernel())
-        fx = f(x, 0.1)
+@testset "sparse_approximations" begin
+    @testset "approx_posterior_gp[$(ApproxType)]" for ApproxType in [VFE, DTC]
+        rng = MersenneTwister(123456)
+        N_cond = 3
+        N_a = 5
+        N_b = 6
+
+        # Specify prior.
+        f = GP(sin, Matern32Kernel())
+
+        # Sample from prior.
+        x = collect(range(-1.0, 1.0; length=N_cond))
+        fx = f(x, 1e-15)
         y = rand(rng, fx)
 
-        # Ensure that the elbo is close to the logpdf when appropriate.
-        @test elbo(VFE(f(x)), fx, y) isa Real
-        @test elbo(VFE(f(x)), fx, y) ≈ logpdf(fx, y)
-        @test elbo(VFE(f(x .+ randn(rng, N_cond))), fx, y) < logpdf(fx, y)
-
-        # Ensure that the dtc is close to the logpdf when appropriate.
-        @test dtc(VFE(f(x)), fx, y) isa Real
-        @test dtc(VFE(f(x)), fx, y) ≈ logpdf(fx, y)
+        # Construct posterior.
+        f_post = posterior(fx, y)
+
+        # Construct optimal approximate posterior.
+        f_approx_post = posterior(ApproxType(f(x, 1e-12)), fx, y)
+
+        # Verify that approximate posterior ≈ posterior at various inputs.
+        x_test = randn(rng, 100)
+        @test mean(f_post, x_test) ≈ mean(f_approx_post, x_test)
+        @test cov(f_post, x_test) ≈ cov(f_approx_post, x_test)
+
+        # Verify that AbstractGP interface is implemented fully and consistently.
+        a = collect(range(-1.0, 1.0; length=N_a))
+        b = randn(rng, N_b)
+        TestUtils.test_internal_abstractgps_interface(rng, f_approx_post, a, b)
+
+        @testset "update_posterior (new observation)" begin
+            rng = MersenneTwister(1)
+            X = rand(rng, 10)
+            y = rand(rng, 10)
+            Z = rand(rng, 4)
+
+            f = GP(SqExponentialKernel())
+
+            # online learning
+            p_fx1 = posterior(ApproxType(f(Z)), f(X[1:7], 0.1), y[1:7])
+            u_p_fx1 = update_posterior(p_fx1, f(X[8:10], 0.1), y[8:10])
+
+            # batch learning
+            p_fx2 = posterior(ApproxType(f(Z)), f(X, 0.1), y)
+
+            @test inducing_points(u_p_fx1) ≈ inducing_points(p_fx2) atol = 1e-5
+            @test u_p_fx1.data.m_ε ≈ p_fx2.data.m_ε atol = 1e-5
+            @test u_p_fx1.data.Λ_ε.U ≈ p_fx2.data.Λ_ε.U atol = 1e-5
+            @test u_p_fx1.data.U ≈ p_fx2.data.U atol = 1e-5
+            @test u_p_fx1.data.α ≈ p_fx2.data.α atol = 1e-5
+            @test u_p_fx1.data.b_y ≈ p_fx2.data.b_y atol = 1e-5
+            @test u_p_fx1.data.B_εf ≈ p_fx2.data.B_εf atol = 1e-5
+            @test u_p_fx1.data.x ≈ p_fx2.data.x atol = 1e-5
+            @test u_p_fx1.data.Σy ≈ p_fx2.data.Σy atol = 1e-5
+        end
+
+        @testset "update_posterior (new pseudo-points)" begin
+            rng = MersenneTwister(1)
+            X = rand(rng, 10)
+            y = rand(rng, 10)
+            Z1 = rand(rng, 4)
+            Z2 = rand(rng, 3)
+            Z = vcat(Z1, Z2)
+
+            f = GP(SqExponentialKernel())
+
+            # Sequentially adding pseudo points
+            p_fx1 = posterior(ApproxType(f(Z1)), f(X, 0.1), y)
+            u_p_fx1 = update_posterior(p_fx1, f(Z2))
+
+            # Adding all pseudo-points at once
+            p_fx2 = posterior(ApproxType(f(Z)), f(X, 0.1), y)
+
+            @test inducing_points(u_p_fx1) ≈ inducing_points(p_fx2) atol = 1e-5
+            @test u_p_fx1.data.m_ε ≈ p_fx2.data.m_ε atol = 1e-5
+            @test u_p_fx1.data.Λ_ε.U ≈ p_fx2.data.Λ_ε.U atol = 1e-5
+            @test u_p_fx1.data.U ≈ p_fx2.data.U atol = 1e-5
+            @test u_p_fx1.data.α ≈ p_fx2.data.α atol = 1e-2 rtol = 1e-2
+            @test u_p_fx1.data.b_y ≈ p_fx2.data.b_y atol = 1e-5
+            @test u_p_fx1.data.B_εf ≈ p_fx2.data.B_εf atol = 1e-5
+            @test u_p_fx1.data.x ≈ p_fx2.data.x atol = 1e-5
+            @test u_p_fx1.data.Σy ≈ p_fx2.data.Σy atol = 1e-5
+        end
+
+        @testset "approx_log_evidence" begin
+            x = collect(Float64, range(-1.0, 1.0; length=N_cond))
+            f = GP(SqExponentialKernel())
+            fx = f(x, 0.1)
+            y = rand(rng, fx)
+
+            # Ensure that the elbo/dtc objective is close to the logpdf when appropriate.
+            @test approx_log_evidence(ApproxType(f(x)), fx, y) isa Real
+            @test approx_log_evidence(ApproxType(f(x)), fx, y) ≈ logpdf(fx, y)
+
+            if ApproxType === VFE
+                @test elbo(ApproxType(f(x)), fx, y) ==
+                    approx_log_evidence(ApproxType(f(x)), fx, y)
+                @test elbo(ApproxType(f(x .+ randn(rng, N_cond))), fx, y) < logpdf(fx, y)
+            end
+        end
+
+        @testset "Type Stability - $T" for T in [Float64, Float32]
+            x = collect(T, range(-1.0, 1.0; length=N_cond))
+            f = GP(T(0), SqExponentialKernel())
+            fx = f(x, T(0.1))
+            y = rand(rng, fx)
+            jitter = T(1e-12)
+
+            @test elbo(ApproxType(f(x, jitter)), fx, y) isa T
+            @test dtc(ApproxType(f(x, jitter)), fx, y) isa T
+
+            post = posterior(ApproxType(f(x, jitter)), fx, y)
+            p_fx = post(x, jitter)
+
+            @test rand(rng, p_fx) isa Vector{T}
+            @test logpdf(p_fx, y) isa T
+        end
     end
 
-    @testset "Type Stability - $T" for T in [Float64, Float32]
-        x = collect(T, range(-1.0, 1.0; length=N_cond))
-        f = GP(T(0), SqExponentialKernel())
-        fx = f(x, T(0.1))
-        y = rand(rng, fx)
-        jitter = T(1e-12)
-
-        @test elbo(VFE(f(x, jitter)), fx, y) isa T
-        @test dtc(VFE(f(x, jitter)), fx, y) isa T
-
-        post = posterior(VFE(f(x, jitter)), fx, y)
-        p_fx = post(x, jitter)
-
-        @test rand(rng, p_fx) isa Vector{T}
-        @test logpdf(p_fx, y) isa T
-    end
-end
+    @testset "tr_Cf_invΣy" begin
+        N = 3
+        x = rand(N)
+        σ² = 1.234
+        Σy_Diagonal = Diagonal(Fill(σ², N))
+        Σy_ScalMat = ScalMat(N, σ²)
+        # Σy_dense = _to_psd(randn(N, N - 2))  # dense observation covariance not currently implemented for sparse approximation, and might be slower than exact inference anyways
+        f = GP(SqExponentialKernel())
 
-@testset "tr_Cf_invΣy" begin
-    N = 3
-    x = rand(N)
-    σ² = 1.234
-    Σy_Diagonal = Diagonal(Fill(σ², N))
-    Σy_ScalMat = ScalMat(N, σ²)
-    # Σy_dense = _to_psd(randn(N, N - 2))  # dense observation covariance not currently implemented for sparse approximation, and might be slower than exact inference anyways
-    f = GP(SqExponentialKernel())
-
-    for Σy in (Σy_Diagonal, Σy_ScalMat)
-        fx = f(x, Σy)
-        Cf = cov(f, x)
-        @test AbstractGPs.tr_Cf_invΣy(fx, Σy) ≈ tr(Cf / Matrix(Σy))
+        for Σy in (Σy_Diagonal, Σy_ScalMat)
+            fx = f(x, Σy)
+            Cf = cov(f, x)
+            @test AbstractGPs.tr_Cf_invΣy(fx, Σy) ≈ tr(Cf / Matrix(Σy))
+        end
     end
 end