Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial work on CUDA-compat #25

Open
wants to merge 20 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,24 @@ Requires = "ae029012-a4dd-5104-9daa-d747884805df"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[weakdeps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"


[extensions]
NormalizingFlowsEnzymeExt = "Enzyme"
NormalizingFlowsForwardDiffExt = "ForwardDiff"
NormalizingFlowsReverseDiffExt = "ReverseDiff"
NormalizingFlowsZygoteExt = "Zygote"
NormalizingFlowsCUDAExt = "CUDA"

[compat]
ADTypes = "0.1"
Bijectors = "0.12.6, 0.13"
CUDA = "3, 4"
DiffResults = "1"
Distributions = "0.25"
DocStringExtensions = "0.9"
Expand All @@ -48,3 +52,4 @@ Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
80 changes: 80 additions & 0 deletions ext/NormalizingFlowsCUDAExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
module NormalizingFlowsCUDAExt

using CUDA
using NormalizingFlows
using NormalizingFlows: Random, Distributions, Bijectors

# to enable `rand_device(rng:CUDA.RNG, dist[, num_samples])`
function NormalizingFlows.rand_device(
rng::CUDA.RNG,
s::Distributions.Sampleable{<:Distributions.ArrayLikeVariate,Distributions.Continuous},
)
println("gpu rand")
return rand_cuda(rng, s)
end

function NormalizingFlows.rand_device(
rng::CUDA.RNG,
s::Distributions.Sampleable{<:Distributions.ArrayLikeVariate,Distributions.Continuous},
n::Int,
)
println("gpu rand")
return rand_cuda(rng, s, n)
end

function rand_cuda(
rng::CUDA.RNG,
s::Distributions.Sampleable{<:Distributions.ArrayLikeVariate,Distributions.Continuous},
)
return @inbounds Distributions.rand!(
rng, Distributions.sampler(s), CuArray{float(eltype(s))}(undef, size(s))
)
end

function rand_cuda(
rng::CUDA.RNG,
s::Distributions.Sampleable{<:Distributions.ArrayLikeVariate,Distributions.Continuous},
n::Int,
)
return @inbounds Distributions.rand!(
rng, Distributions.sampler(s), CuArray{float(eltype(s))}(undef, size(s)..., n)
)
end

# Question: is this type piracy okay?
# (it's probably not ideal but this is sensible enough for now )
function Distributions._rand!(rng::CUDA.RNG, d::Distributions.MvNormal, x::CuVecOrMat)
# Replaced usage of scalar indexing.
Random.randn!(rng, x)
Distributions.unwhiten!(d.Σ, x)
x .+= d.μ
return x
end

# to enable `rand_device(rng:CUDA.RNG, flow[, num_samples])`
function NormalizingFlows.rand_device(rng::CUDA.RNG, td::Bijectors.TransformedDistribution)
return rand_cuda(rng, td)
end

function NormalizingFlows.rand_device(
rng::CUDA.RNG, td::Bijectors.TransformedDistribution, num_samples::Int
)
return rand_cuda(rng, td, num_samples)
end

function rand_cuda(rng::CUDA.RNG, td::Bijectors.TransformedDistribution)
return td.transform(rand_cuda(rng, td.dist))
end

function rand_cuda(rng::CUDA.RNG, td::Bijectors.TransformedDistribution, num_samples::Int)
samples = rand_cuda(rng, td.dist, num_samples)
res = reduce(
hcat,
map(axes(samples, 2)) do i
return td.transform(view(samples, :, i))
end,
)
return res
end

end
5 changes: 5 additions & 0 deletions src/NormalizingFlows.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ using ADTypes, DiffResults
using DocStringExtensions

export train_flow, elbo, loglikelihood, value_and_gradient!
export rand_device

using ADTypes
using DiffResults
Expand Down Expand Up @@ -72,6 +73,7 @@ function train_flow(
end

include("train.jl")
include("sample.jl")
include("objectives.jl")

# optional dependencies
Expand All @@ -94,6 +96,9 @@ function __init__()
@require Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" include(
"../ext/NormalizingFlowsZygoteExt.jl"
)
@require CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" include(
"../ext/NormalizingFlowsCUDAExt.jl"
)
end
end
end
4 changes: 2 additions & 2 deletions src/objectives/elbo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ function elbo(flow::Bijectors.MultivariateTransformed, logp, xs::AbstractMatrix)
end

function elbo(rng::AbstractRNG, flow::Bijectors.MultivariateTransformed, logp, n_samples)
return elbo(flow, logp, rand(rng, flow.dist, n_samples))
return elbo(flow, logp, rand_device(rng, flow.dist, n_samples))
end

function elbo(rng::AbstractRNG, flow::Bijectors.UnivariateTransformed, logp, n_samples)
return elbo(flow, logp, rand(rng, flow.dist, n_samples))
return elbo(flow, logp, rand_device(rng, flow.dist, n_samples))
end

function elbo(flow::Bijectors.TransformedDistribution, logp, n_samples)
Expand Down
69 changes: 69 additions & 0 deletions src/sample.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# this file defines rand_device function to sample from a Distribution or a
# flow<:Bijectors.TranformedDistribution
# this is mainly for resolving the issue of sampling from a distribution on GPU

# function rand_device(
# rng::AbstractRNG,
# s::Distributions.Sampleable{<:Distributions.ArrayLikeVariate,Distributions.Continuous},
# )
# if !(rng isa CUDA.RNG)
# return Distributions.rand(rng, s)
# else
# return rand_cuda(rng, s)
# end
# end

function rand_device(
rng::AbstractRNG,
s::Distributions.Sampleable{<:Distributions.ArrayLikeVariate,Distributions.Continuous},
)
return Distributions.rand(rng, s)
end

function rand_device(
rng::AbstractRNG,
s::Distributions.Sampleable{<:Distributions.ArrayLikeVariate,Distributions.Continuous},
n::Int,
)
return Distributions.rand(rng, s, n)
end

# function rand_device(
# rng::AbstractRNG,
# s::Distributions.Sampleable{Distributions.Multivariate,Distributions.Continuous},
# n::Int,
# )
# if !(rng isa CUDA.RNG)
# return Distributions.rand(rng, s, n)
# else
# return rand_cuda(rng, s, n)
# end
# end

#########################
# for Bijectors.jl
##########################

function rand_device(rng::AbstractRNG, td::Bijectors.TransformedDistribution)
return Distributions.rand(rng, td)
end

function rand_device(rng::AbstractRNG, td::Bijectors.TransformedDistribution, n::Int)
return Distributions.rand(rng, td, n)
end

# function rand_device(rng::AbstractRNG, td::Bijectors.TransformedDistribution)
# if !(rng isa CUDA.RNG)
# return Distributions.rand(rng, td)
# else
# return rand_cuda(rng, td)
# end
# end

# function rand_device(rng::AbstractRNG, td::Bijectors.TransformedDistribution, n::Int)
# if !(rng isa CUDA.RNG)
# return Distributions.rand(rng, td, n)
# else
# return rand_cuda(rng, td, n)
# end
# end
3 changes: 2 additions & 1 deletion src/train.jl
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ function optimize(
opt_stats = []

θ = copy(θ₀)
diff_result = DiffResults.GradientResult(θ)
diff_result = DiffResults.DiffResult(zero(eltype(θ)), similar(θ))
# initialise optimiser state
st = Optimisers.setup(optimiser, θ)

Expand All @@ -140,6 +140,7 @@ function optimize(
# Save stats
ls = DiffResults.value(diff_result)
g = DiffResults.gradient(diff_result)

stat = (iteration=i, loss=ls, gradient_norm=norm(g))
push!(opt_stats, stat)

Expand Down
15 changes: 15 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,28 @@
[deps]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[compat]
ADTypes = "0.1"
Bijectors = "0.12.6, 0.13"
CUDA = "3, 4"
DiffResults = "1"
Distributions = "0.25"
Enzyme = "0.11"
ForwardDiff = "0.10.25"
Optimisers = "0.2.16"
ReverseDiff = "1.14"
Zygote = "0.6"
Flux = "0.13, 0.14"
2 changes: 1 addition & 1 deletion test/ad.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ end
target = MvNormal(μ, Σ)
logp(z) = logpdf(target, z)

q₀ = MvNormal(zeros(T, 2), ones(T, 2))
q₀ = MvNormal(zeros(T, 2), I)
flow = Bijectors.transformed(q₀, Bijectors.Shift(zero.(μ)))

sample_per_iter = 10
Expand Down
31 changes: 31 additions & 0 deletions test/cuda.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
using CUDA, Test, LinearAlgebra, Distributions
using Flux

if CUDA.functional()
@testset "rand with CUDA" begin
dists = [
MvNormal(CUDA.zeros(2), I), MvNormal(CUDA.zeros(2), cu([1.0 0.5; 0.5 1.0]))
]

@testset "$dist" for dist in dists
x = rand_device(CUDA.default_rng(), dist)
xs = rand_device(CUDA.default_rng(), dist, 100)
@info logpdf(dist, x)
@test x isa CuArray
@test xs isa CuArray
end

@testset "$dist" for dist in dists
CUDA.allowscalar(true)
ts = reduce(∘, [Bijectors.PlanarLayer(2) for _ in 1:2])
ts_g = gpu(ts)
flow = Bijectors.transformed(dist, ts_g)

y = rand_device(CUDA.default_rng(), flow)
ys = rand_device(CUDA.default_rng(), flow, 100)
@info logpdf(flow, y)
@test y isa CuArray
@test ys isa CuArray
end
end
end
4 changes: 2 additions & 2 deletions test/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
target = MvNormal(μ, Σ)
logp(z) = logpdf(target, z)

q₀ = MvNormal(zeros(T, 2), ones(T, 2))
q₀ = MvNormal(zeros(T, 2), I)
flow = Bijectors.transformed(
q₀, Bijectors.Shift(zero.(μ)) ∘ Bijectors.Scale(ones(T, 2))
)
Expand All @@ -27,7 +27,7 @@
logp,
sample_per_iter;
max_iters=5_000,
optimiser=Optimisers.ADAM(0.01 * one(T)),
optimiser=Optimisers.Adam(0.01 * one(T)),
ADbackend=adtype,
show_progress=false,
callback=cb,
Expand Down
2 changes: 1 addition & 1 deletion test/objectives.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
target = MvNormal(μ, Σ)
logp(z) = logpdf(target, z)

q₀ = MvNormal(zeros(T, 2), ones(T, 2))
q₀ = MvNormal(zeros(T, 2), I)
flow = Bijectors.transformed(q₀, Bijectors.Shift(μ) ∘ Bijectors.Scale(sqrt.(Σ)))

x = randn(T, 2)
Expand Down
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ using ADTypes, DiffResults
using ForwardDiff, Zygote, Enzyme, ReverseDiff
using Test

include("cuda.jl")
include("ad.jl")
include("objectives.jl")
include("interface.jl")