TuringLang · torfjelde · Aug 10, 2023 · Aug 10, 2023 · Aug 10, 2023 · Aug 15, 2023
diff --git a/Project.toml b/Project.toml
@@ -16,20 +16,24 @@ Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [weakdeps]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
+
 [extensions]
 NormalizingFlowsEnzymeExt = "Enzyme"
 NormalizingFlowsForwardDiffExt = "ForwardDiff"
 NormalizingFlowsReverseDiffExt = "ReverseDiff"
 NormalizingFlowsZygoteExt = "Zygote"
+NormalizingFlowsCUDAExt = "CUDA"
 
 [compat]
 ADTypes = "0.1"
 Bijectors = "0.12.6, 0.13"
+CUDA = "3, 4"
 DiffResults = "1"
 Distributions = "0.25"
 DocStringExtensions = "0.9"
@@ -48,3 +52,4 @@ Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
diff --git a/example/Project.toml b/example/Project.toml
@@ -15,3 +15,4 @@ Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
-cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
-cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
diff --git a/ext/NormalizingFlowsCUDAExt.jl b/ext/NormalizingFlowsCUDAExt.jl
@@ -0,0 +1,40 @@
+module NormalizingFlowsCUDAExt
+
+using CUDA
+using NormalizingFlows: Random, Distributions
+
+# Make allocation of output array live on GPU.
+function Distributions.rand(
+    rng::CUDA.RNG,
+    s::Distributions.Sampleable{<:Distributions.ArrayLikeVariate,Distributions.Continuous},
+)
+    return @inbounds Distributions.rand!(
+        rng, Distributions.sampler(s), CuArray{float(eltype(s))}(undef, size(s))
+    )
+end
+
+function Distributions.rand(
+    rng::CUDA.RNG,
+    s::Distributions.Sampleable{<:Distributions.ArrayLikeVariate,Distributions.Continuous},
+    n::Int,
+)
+    return @inbounds Distributions.rand!(
+        rng, Distributions.sampler(s), CuArray{float(eltype(s))}(undef, length(s), n)
+    )
+end
+
+function Distributions._rand!(rng::CUDA.RNG, d::Distributions.MvNormal, x::CuVecOrMat)
+    # Replaced usage of scalar indexing.
+    CUDA.randn!(rng, x)
+    Distributions.unwhiten!(d.Σ, x)
+    x .+= d.μ
+    return x
+end
+
+function Distributions.insupport(
+    ::Type{D}, x::CuVector{T}
+) where {T<:Real,D<:Distributions.AbstractMvLogNormal}
+    return all(0 .< x .< Inf)
+end
+
+end
diff --git a/src/NormalizingFlows.jl b/src/NormalizingFlows.jl
@@ -94,6 +94,9 @@ function __init__()
         @require Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" include(
             "../ext/NormalizingFlowsZygoteExt.jl"
         )
+        @require CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" include(
+            "../ext/NormalizingFlowsCUDAExt.jl"
+        )
     end
 end
 end
diff --git a/test/Project.toml b/test/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"

diff --git a/test/ad.jl b/test/ad.jl
@@ -33,7 +33,7 @@ end
             target = MvNormal(μ, Σ)
             logp(z) = logpdf(target, z)
 
-            q₀ = MvNormal(zeros(T, 2), ones(T, 2))
+            q₀ = MvNormal(zeros(T, 2), I)
             flow = Bijectors.transformed(q₀, Bijectors.Shift(zero.(μ)))
 
             sample_per_iter = 10

diff --git a/test/cuda.jl b/test/cuda.jl
@@ -0,0 +1,18 @@
+using CUDA, Test, LinearAlgebra, Distributions
+
+if CUDA.functional()
+    @testset "rand with CUDA" begin
+        dists = [
+            MvNormal(CUDA.zeros(2), I),
+            MvNormal(CUDA.zeros(2), cu([1.0 0.5; 0.5 1.0])),
+            MvLogNormal(CUDA.zeros(2), I),
+            MvLogNormal(CUDA.zeros(2), cu([1.0 0.5; 0.5 1.0])),
+        ]
+
+        @testset "$dist" for dist in dists
+            x = rand(CUDA.default_rng(), dist)
+            @info logpdf(dist, x)
+            @test x isa CuArray
+        end
+    end
+end
diff --git a/test/interface.jl b/test/interface.jl
@@ -13,7 +13,7 @@
             target = MvNormal(μ, Σ)
             logp(z) = logpdf(target, z)
 
-            q₀ = MvNormal(zeros(T, 2), ones(T, 2))
+            q₀ = MvNormal(zeros(T, 2), I)
             flow = Bijectors.transformed(
                 q₀, Bijectors.Shift(zero.(μ)) ∘ Bijectors.Scale(ones(T, 2))
             )
@@ -27,7 +27,7 @@
                 logp,
                 sample_per_iter;
                 max_iters=5_000,
-                optimiser=Optimisers.ADAM(0.01 * one(T)),
+                optimiser=Optimisers.Adam(0.01 * one(T)),
                 ADbackend=adtype,
                 show_progress=false,
                 callback=cb,

diff --git a/test/objectives.jl b/test/objectives.jl
@@ -5,7 +5,7 @@
         target = MvNormal(μ, Σ)
         logp(z) = logpdf(target, z)
 
-        q₀ = MvNormal(zeros(T, 2), ones(T, 2))
+        q₀ = MvNormal(zeros(T, 2), I)
         flow = Bijectors.transformed(q₀, Bijectors.Shift(μ) ∘ Bijectors.Scale(sqrt.(Σ)))
 
         x = randn(T, 2)

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -7,6 +7,7 @@ using ADTypes, DiffResults
 using ForwardDiff, Zygote, Enzyme, ReverseDiff
 using Test
 
+include("cuda.jl")
 include("ad.jl")
 include("objectives.jl")
 include("interface.jl")