mlcolab · sethaxen · Jun 12, 2022 · Jun 12, 2022 · Jun 12, 2022 · Jun 12, 2022
diff --git a/src/elbo.jl b/src/elbo.jl
@@ -1,31 +1,80 @@
-function maximize_elbo(rng, logp, dists, ndraws, executor)
+"""
+    MaximumELBO(; save_draws=false, rng, ndraws, executor)
+
+An optimizer over the trace of fit distributions that returns the ELBO-maximizing distribution.
+
+The ELBO is approximated using Monte Carlo sampling with `ndraws` and the provided `rng`.
+This draws can be reused by Pathfinder to avoid extra log-density evaluations. To enable
+this, set `save_draws=true`.
+
+`executors` is a Transducers.jl executor that determines if and how to perform ELBO
+computation in parallel. The default (`Transducers.SequentialEx()`) performs no
+parallelization. If `rng` is known to be thread-safe, and the log-density function is known
+to have no internal state, then `Transducers.PreferParallel()` may be used to parallelize
+log-density evaluation. This is generally only faster for expensive log density functions.
+"""
+struct MaximumELBO{save_draws,R,E}
+    rng::R
+    ndraws::Int
+    executor::E
+end
+function MaximumELBO(;
+    rng::Random.AbstractRNG=Random.GLOBAL_RNG,
+    ndraws::Int=DEFAULT_NDRAWS_ELBO,
+    executor=Transducers.SequentialEx(),
+    save_draws::Bool=false,
+)
+    return MaximumELBO{save_draws,typeof(rng),typeof(executor)}(rng, ndraws, executor)
+end
+
+function (optimizer::MaximumELBO{save_draws})(logp, _, _, dists) where {save_draws}
+    rng = optimizer.rng
+    ndraws = optimizer.ndraws
+    executor = optimizer.executor
     EE = Core.Compiler.return_type(
-        elbo_and_samples, Tuple{typeof(rng),typeof(logp),eltype(dists),Int}
+        _compute_elbo, Tuple{typeof(rng),typeof(logp),eltype(dists),Int,Val{save_draws}}
     )
-    estimates = similar(dists, EE)
-    isempty(estimates) && return 0, estimates
-    Folds.map!(estimates, dists, executor) do dist
-        return elbo_and_samples(rng, logp, dist, ndraws)
+    dists_not_init = @views dists[(begin + 1):end]
+    estimates = similar(dists_not_init, EE)
+    isempty(estimates) && return false, first(dists), 0, estimates
+    Folds.map!(estimates, dists_not_init, executor) do dist
+        return _compute_elbo(rng, logp, dist, ndraws, Val(save_draws))
     end
     _, iteration_opt = _findmax(estimates |> Transducers.Map(est -> est.value))
-    return iteration_opt, estimates
+    elbo_opt = estimates[iteration_opt].value
+    success = !isnan(elbo_opt) & (elbo_opt != -Inf)
+    return success, dists[iteration_opt + 1], iteration_opt, estimates
 end
 
-function elbo_and_samples(rng, logp, dist, ndraws)
+function _compute_elbo(rng, logp, dist, ndraws, ::Val{save_draws}) where {save_draws}
     ϕ, logqϕ = rand_and_logpdf(rng, dist, ndraws)
     logpϕ = similar(logqϕ)
     logpϕ .= logp.(eachcol(ϕ))
     logr = logpϕ - logqϕ
     elbo = Statistics.mean(logr)
     elbo_se = sqrt(Statistics.var(logr) / length(logr))
-    return ELBOEstimate(elbo, elbo_se, ϕ, logpϕ, logqϕ, logr)
+    return ELBOEstimate(elbo, elbo_se, save_draws ? ϕ : nothing, logpϕ, logqϕ, logr)
 end
 
+"""
+    ELBOEstimate
+
+Container of results of ELBO estimation via Monte Carlo.
+
+# Fields
+- `value`: value of estimate
+- `std_err`: Monte Carlo standard error of estimate
+- `draws`: Draws used to compute estimate
+- `log_densities_actual`: log density of actual distribution evaluated on `draws`.
+- `log_densities_fit`: log density of fit distribution evaluated on `draws`.
+- `log_density_ratios`: log of ratio of actual to fit density. `value` is the mean of this
+    array.
+"""
 struct ELBOEstimate{T,P,L<:AbstractVector{T}}
     value::T
     std_err::T
     draws::P
-    log_densities_target::L
+    log_densities_actual::L
     log_densities_fit::L
     log_density_ratios::L
 end
@@ -38,3 +87,16 @@ end
 function _to_string(est::ELBOEstimate; digits=2)
     return "$(round(est.value; digits)) ± $(round(est.std_err; digits))"
 end
+
+"""
+    draws_from_fit_stats(fit_stats, fit_iteration) -> draws
+
+If applicable, return draws the fit distribution from `fit_iteration` stored in `fit_stats`.
+
+The draws must be the same type and layout as one would get by calling
+`rand(fit_distribution)`.
+"""
+function draws_from_fit_stats end
+
+draws_from_fit_stats(fit_stats, fit_iteration) = nothing
+draws_from_fit_stats(estimates::AbstractVector{<:ELBOEstimate}, i::Int) = estimates[i].draws
diff --git a/src/multipath.jl b/src/multipath.jl
@@ -11,8 +11,8 @@ Container for results of multi-path Pathfinder.
 - `optim_prob::SciMLBase.OptimizationProblem`: Otimization problem used for
     optimization
 - `logp`: Log-density function
-- `fit_distribution::Distributions.MixtureModel`: uniformly-weighted mixture of ELBO-
-    maximizing multivariate normal distributions from each run.
+- `fit_distribution::Distributions.MixtureModel`: uniformly-weighted mixture of
+    multivariate normal distributions returned by each run.
 - `draws::AbstractMatrix{<:Real}`: draws from `fit_distribution` with size `(dim, ndraws)`,
     potentially resampled using importance resampling to be closer to the target
     distribution.
@@ -116,9 +116,6 @@ for approximating expectations with respect to ``p``.
     to run the single-path runs in parallel. If `rng` is known to be thread-safe, the
     default is `Transducers.PreferParallel()` (parallel executation, defaulting to
     multi-threading). Otherwise, it is `Transducers.SequentialEx()` (no parallelization).
-- `executor_per_run::Transducers.Executor=Transducers.SequentialEx()`: Transducers.jl
-    executor used within each run to parallelize PRNG calls. Defaults to no parallelization.
-    See [`pathfinder`](@ref) for a description.
 - `kwargs...` : Remaining keywords are forwarded to [`pathfinder`](@ref).
 
 # Returns
@@ -149,13 +146,11 @@ function multipathfinder(
     init=nothing,
     input=optim_fun,
     nruns::Int=init === nothing ? -1 : length(init),
-    ndraws_elbo::Int=DEFAULT_NDRAWS_ELBO,
-    ndraws_per_run::Int=max(ndraws_elbo, cld(ndraws, max(nruns, 1))),
+    ndraws_per_run::Int=cld(ndraws, max(nruns, 1)),
     rng::Random.AbstractRNG=Random.GLOBAL_RNG,
     history_length::Int=DEFAULT_HISTORY_LENGTH,
     optimizer=default_optimizer(history_length),
     executor::Transducers.Executor=_default_executor(rng),
-    executor_per_run=Transducers.SequentialEx(),
     importance::Bool=true,
     kwargs...,
 )
@@ -184,8 +179,6 @@ function multipathfinder(
             optimizer,
             ndraws=ndraws_per_run,
             init=init_i,
-            executor=executor_per_run,
-            ndraws_elbo,
             kwargs...,
         )
     end