diff --git a/lib/cublas/CUBLAS.jl b/lib/cublas/CUBLAS.jl index 98f11071a0..30ef9e4df5 100644 --- a/lib/cublas/CUBLAS.jl +++ b/lib/cublas/CUBLAS.jl @@ -104,6 +104,7 @@ function handle() end cublasSetStream_v2(new_handle, cuda.stream) + cublasSetPointerMode_v2(new_handle, CUBLAS_POINTER_MODE_DEVICE) math_mode!(new_handle, cuda.math_mode) (; handle=new_handle, cuda.stream, cuda.math_mode) diff --git a/lib/cublas/libcublas.jl b/lib/cublas/libcublas.jl index 82effffa67..530145d5f6 100644 --- a/lib/cublas/libcublas.jl +++ b/lib/cublas/libcublas.jl @@ -100,35 +100,35 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSnrm2_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{Cfloat}, incx::Cint, - result::RefOrCuRef{Cfloat})::cublasStatus_t + result::CuRef{Cfloat})::cublasStatus_t end @checked function cublasDnrm2_v2(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasDnrm2_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{Cdouble}, incx::Cint, - result::RefOrCuRef{Cdouble})::cublasStatus_t + result::CuRef{Cdouble})::cublasStatus_t end @checked function cublasScnrm2_v2(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasScnrm2_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuComplex}, incx::Cint, - result::RefOrCuRef{Cfloat})::cublasStatus_t + result::CuRef{Cfloat})::cublasStatus_t end @checked function cublasDznrm2_v2(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasDznrm2_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuDoubleComplex}, incx::Cint, - result::RefOrCuRef{Cdouble})::cublasStatus_t + result::CuRef{Cdouble})::cublasStatus_t end @checked function cublasSdot_v2(handle, n, x, incx, y, incy, result) initialize_context() @gcsafe_ccall libcublas.cublasSdot_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{Cfloat}, incx::Cint, y::CuPtr{Cfloat}, incy::Cint, - result::RefOrCuRef{Cfloat})::cublasStatus_t + result::CuRef{Cfloat})::cublasStatus_t end @checked function cublasDdot_v2(handle, n, x, incx, y, incy, result) @@ -136,7 +136,7 @@ end @gcsafe_ccall libcublas.cublasDdot_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{Cdouble}, incx::Cint, y::CuPtr{Cdouble}, incy::Cint, - result::RefOrCuRef{Cdouble})::cublasStatus_t + result::CuRef{Cdouble})::cublasStatus_t end @checked function cublasCdotu_v2(handle, n, x, incx, y, incy, result) @@ -144,7 +144,7 @@ end @gcsafe_ccall libcublas.cublasCdotu_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuComplex}, incx::Cint, y::CuPtr{cuComplex}, incy::Cint, - result::RefOrCuRef{cuComplex})::cublasStatus_t + result::CuRef{cuComplex})::cublasStatus_t end @checked function cublasCdotc_v2(handle, n, x, incx, y, incy, result) @@ -152,7 +152,7 @@ end @gcsafe_ccall libcublas.cublasCdotc_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuComplex}, incx::Cint, y::CuPtr{cuComplex}, incy::Cint, - result::RefOrCuRef{cuComplex})::cublasStatus_t + result::CuRef{cuComplex})::cublasStatus_t end @checked function cublasZdotu_v2(handle, n, x, incx, y, incy, result) @@ -160,7 +160,7 @@ end @gcsafe_ccall libcublas.cublasZdotu_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuDoubleComplex}, incx::Cint, y::CuPtr{cuDoubleComplex}, incy::Cint, - result::RefOrCuRef{cuDoubleComplex})::cublasStatus_t + result::CuRef{cuDoubleComplex})::cublasStatus_t end @checked function cublasZdotc_v2(handle, n, x, incx, y, incy, result) @@ -168,41 +168,41 @@ end @gcsafe_ccall libcublas.cublasZdotc_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuDoubleComplex}, incx::Cint, y::CuPtr{cuDoubleComplex}, incy::Cint, - result::RefOrCuRef{cuDoubleComplex})::cublasStatus_t + result::CuRef{cuDoubleComplex})::cublasStatus_t end @checked function cublasSscal_v2(handle, n, alpha, x, incx) initialize_context() @gcsafe_ccall libcublas.cublasSscal_v2(handle::cublasHandle_t, n::Cint, - alpha::RefOrCuRef{Cfloat}, x::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, x::CuPtr{Cfloat}, incx::Cint)::cublasStatus_t end @checked function cublasDscal_v2(handle, n, alpha, x, incx) initialize_context() @gcsafe_ccall libcublas.cublasDscal_v2(handle::cublasHandle_t, n::Cint, - alpha::RefOrCuRef{Cdouble}, x::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, x::CuPtr{Cdouble}, incx::Cint)::cublasStatus_t end @checked function cublasCscal_v2(handle, n, alpha, x, incx) initialize_context() @gcsafe_ccall libcublas.cublasCscal_v2(handle::cublasHandle_t, n::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Cint)::cublasStatus_t end @checked function cublasCsscal_v2(handle, n, alpha, x, incx) initialize_context() @gcsafe_ccall libcublas.cublasCsscal_v2(handle::cublasHandle_t, n::Cint, - alpha::RefOrCuRef{Cfloat}, x::CuPtr{cuComplex}, + alpha::CuRef{Cfloat}, x::CuPtr{cuComplex}, incx::Cint)::cublasStatus_t end @checked function cublasZscal_v2(handle, n, alpha, x, incx) initialize_context() @gcsafe_ccall libcublas.cublasZscal_v2(handle::cublasHandle_t, n::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Cint)::cublasStatus_t end @@ -210,7 +210,7 @@ end @checked function cublasZdscal_v2(handle, n, alpha, x, incx) initialize_context() @gcsafe_ccall libcublas.cublasZdscal_v2(handle::cublasHandle_t, n::Cint, - alpha::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, x::CuPtr{cuDoubleComplex}, incx::Cint)::cublasStatus_t end @@ -218,7 +218,7 @@ end @checked function cublasSaxpy_v2(handle, n, alpha, x, incx, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasSaxpy_v2(handle::cublasHandle_t, n::Cint, - alpha::RefOrCuRef{Cfloat}, x::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, x::CuPtr{Cfloat}, incx::Cint, y::CuPtr{Cfloat}, incy::Cint)::cublasStatus_t end @@ -226,7 +226,7 @@ end @checked function cublasDaxpy_v2(handle, n, alpha, x, incx, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasDaxpy_v2(handle::cublasHandle_t, n::Cint, - alpha::RefOrCuRef{Cdouble}, x::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, x::CuPtr{Cdouble}, incx::Cint, y::CuPtr{Cdouble}, incy::Cint)::cublasStatus_t end @@ -234,7 +234,7 @@ end @checked function cublasCaxpy_v2(handle, n, alpha, x, incx, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasCaxpy_v2(handle::cublasHandle_t, n::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Cint, y::CuPtr{cuComplex}, incy::Cint)::cublasStatus_t end @@ -242,7 +242,7 @@ end @checked function cublasZaxpy_v2(handle, n, alpha, x, incx, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasZaxpy_v2(handle::cublasHandle_t, n::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Cint, y::CuPtr{cuDoubleComplex}, incy::Cint)::cublasStatus_t @@ -310,100 +310,100 @@ end initialize_context() @gcsafe_ccall libcublas.cublasIsamax_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{Cfloat}, incx::Cint, - result::RefOrCuRef{Cint})::cublasStatus_t + result::CuRef{Cint})::cublasStatus_t end @checked function cublasIdamax_v2(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIdamax_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{Cdouble}, incx::Cint, - result::RefOrCuRef{Cint})::cublasStatus_t + result::CuRef{Cint})::cublasStatus_t end @checked function cublasIcamax_v2(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIcamax_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuComplex}, incx::Cint, - result::RefOrCuRef{Cint})::cublasStatus_t + result::CuRef{Cint})::cublasStatus_t end @checked function cublasIzamax_v2(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIzamax_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuDoubleComplex}, incx::Cint, - result::RefOrCuRef{Cint})::cublasStatus_t + result::CuRef{Cint})::cublasStatus_t end @checked function cublasIsamin_v2(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIsamin_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{Cfloat}, incx::Cint, - result::RefOrCuRef{Cint})::cublasStatus_t + result::CuRef{Cint})::cublasStatus_t end @checked function cublasIdamin_v2(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIdamin_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{Cdouble}, incx::Cint, - result::RefOrCuRef{Cint})::cublasStatus_t + result::CuRef{Cint})::cublasStatus_t end @checked function cublasIcamin_v2(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIcamin_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuComplex}, incx::Cint, - result::RefOrCuRef{Cint})::cublasStatus_t + result::CuRef{Cint})::cublasStatus_t end @checked function cublasIzamin_v2(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIzamin_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuDoubleComplex}, incx::Cint, - result::RefOrCuRef{Cint})::cublasStatus_t + result::CuRef{Cint})::cublasStatus_t end @checked function cublasSasum_v2(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasSasum_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{Cfloat}, incx::Cint, - result::RefOrCuRef{Cfloat})::cublasStatus_t + result::CuRef{Cfloat})::cublasStatus_t end @checked function cublasDasum_v2(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasDasum_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{Cdouble}, incx::Cint, - result::RefOrCuRef{Cdouble})::cublasStatus_t + result::CuRef{Cdouble})::cublasStatus_t end @checked function cublasScasum_v2(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasScasum_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuComplex}, incx::Cint, - result::RefOrCuRef{Cfloat})::cublasStatus_t + result::CuRef{Cfloat})::cublasStatus_t end @checked function cublasDzasum_v2(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasDzasum_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuDoubleComplex}, incx::Cint, - result::RefOrCuRef{Cdouble})::cublasStatus_t + result::CuRef{Cdouble})::cublasStatus_t end @checked function cublasSrot_v2(handle, n, x, incx, y, incy, c, s) initialize_context() @gcsafe_ccall libcublas.cublasSrot_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{Cfloat}, incx::Cint, y::CuPtr{Cfloat}, incy::Cint, - c::RefOrCuRef{Cfloat}, - s::RefOrCuRef{Cfloat})::cublasStatus_t + c::CuRef{Cfloat}, + s::CuRef{Cfloat})::cublasStatus_t end @checked function cublasDrot_v2(handle, n, x, incx, y, incy, c, s) initialize_context() @gcsafe_ccall libcublas.cublasDrot_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{Cdouble}, incx::Cint, y::CuPtr{Cdouble}, - incy::Cint, c::RefOrCuRef{Cdouble}, - s::RefOrCuRef{Cdouble})::cublasStatus_t + incy::Cint, c::CuRef{Cdouble}, + s::CuRef{Cdouble})::cublasStatus_t end @checked function cublasCrot_v2(handle, n, x, incx, y, incy, c, s) @@ -411,8 +411,8 @@ end @gcsafe_ccall libcublas.cublasCrot_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuComplex}, incx::Cint, y::CuPtr{cuComplex}, incy::Cint, - c::RefOrCuRef{Cfloat}, - s::RefOrCuRef{cuComplex})::cublasStatus_t + c::CuRef{Cfloat}, + s::CuRef{cuComplex})::cublasStatus_t end @checked function cublasCsrot_v2(handle, n, x, incx, y, incy, c, s) @@ -420,8 +420,8 @@ end @gcsafe_ccall libcublas.cublasCsrot_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuComplex}, incx::Cint, y::CuPtr{cuComplex}, incy::Cint, - c::RefOrCuRef{Cfloat}, - s::RefOrCuRef{Cfloat})::cublasStatus_t + c::CuRef{Cfloat}, + s::CuRef{Cfloat})::cublasStatus_t end @checked function cublasZrot_v2(handle, n, x, incx, y, incy, c, s) @@ -429,45 +429,45 @@ end @gcsafe_ccall libcublas.cublasZrot_v2(handle::cublasHandle_t, n::Cint, x::CuPtr{cuDoubleComplex}, incx::Cint, y::CuPtr{cuDoubleComplex}, incy::Cint, - c::RefOrCuRef{Cdouble}, - s::RefOrCuRef{cuDoubleComplex})::cublasStatus_t + c::CuRef{Cdouble}, + s::CuRef{cuDoubleComplex})::cublasStatus_t end @checked function cublasZdrot_v2(handle, n, x, incx, y, incy, c, s) initialize_context() @gcsafe_ccall libcublas.cublasZdrot_v2(handle::cublasHandle_t, n::Cint, - x::CuPtr{cuDoubleComplex}, incx::Cint, - y::CuPtr{cuDoubleComplex}, incy::Cint, - c::RefOrCuRef{Cdouble}, - s::RefOrCuRef{Cdouble})::cublasStatus_t + x::CuRef{cuDoubleComplex}, incx::Cint, + y::CuRef{cuDoubleComplex}, incy::Cint, + c::CuRef{Cdouble}, + s::CuRef{Cdouble})::cublasStatus_t end @checked function cublasSrotg_v2(handle, a, b, c, s) initialize_context() - @gcsafe_ccall libcublas.cublasSrotg_v2(handle::cublasHandle_t, a::RefOrCuRef{Cfloat}, - b::RefOrCuRef{Cfloat}, c::PtrOrCuPtr{Cfloat}, - s::PtrOrCuPtr{Cfloat})::cublasStatus_t + @gcsafe_ccall libcublas.cublasSrotg_v2(handle::cublasHandle_t, a::CuRef{Cfloat}, + b::CuRef{Cfloat}, c::CuRef{Cfloat}, + s::CuRef{Cfloat})::cublasStatus_t end @checked function cublasDrotg_v2(handle, a, b, c, s) initialize_context() - @gcsafe_ccall libcublas.cublasDrotg_v2(handle::cublasHandle_t, a::RefOrCuRef{Cdouble}, - b::RefOrCuRef{Cdouble}, c::PtrOrCuPtr{Cdouble}, + @gcsafe_ccall libcublas.cublasDrotg_v2(handle::cublasHandle_t, a::CuRef{Cdouble}, + b::CuRef{Cdouble}, c::PtrOrCuPtr{Cdouble}, s::PtrOrCuPtr{Cdouble})::cublasStatus_t end @checked function cublasCrotg_v2(handle, a, b, c, s) initialize_context() - @gcsafe_ccall libcublas.cublasCrotg_v2(handle::cublasHandle_t, a::RefOrCuRef{cuComplex}, - b::RefOrCuRef{cuComplex}, c::PtrOrCuPtr{Cfloat}, + @gcsafe_ccall libcublas.cublasCrotg_v2(handle::cublasHandle_t, a::CuRef{cuComplex}, + b::CuRef{cuComplex}, c::PtrOrCuPtr{Cfloat}, s::PtrOrCuPtr{cuComplex})::cublasStatus_t end @checked function cublasZrotg_v2(handle, a, b, c, s) initialize_context() @gcsafe_ccall libcublas.cublasZrotg_v2(handle::cublasHandle_t, - a::RefOrCuRef{cuDoubleComplex}, - b::RefOrCuRef{cuDoubleComplex}, + a::CuRef{cuDoubleComplex}, + b::CuRef{cuDoubleComplex}, c::PtrOrCuPtr{Cdouble}, s::PtrOrCuPtr{cuDoubleComplex})::cublasStatus_t end @@ -490,18 +490,18 @@ end @checked function cublasSrotmg_v2(handle, d1, d2, x1, y1, param) initialize_context() - @gcsafe_ccall libcublas.cublasSrotmg_v2(handle::cublasHandle_t, d1::RefOrCuRef{Cfloat}, - d2::RefOrCuRef{Cfloat}, x1::RefOrCuRef{Cfloat}, - y1::RefOrCuRef{Cfloat}, + @gcsafe_ccall libcublas.cublasSrotmg_v2(handle::cublasHandle_t, d1::CuRef{Cfloat}, + d2::CuRef{Cfloat}, x1::CuRef{Cfloat}, + y1::CuRef{Cfloat}, param::PtrOrCuPtr{Cfloat})::cublasStatus_t end @checked function cublasDrotmg_v2(handle, d1, d2, x1, y1, param) initialize_context() - @gcsafe_ccall libcublas.cublasDrotmg_v2(handle::cublasHandle_t, d1::RefOrCuRef{Cdouble}, - d2::RefOrCuRef{Cdouble}, - x1::RefOrCuRef{Cdouble}, - y1::RefOrCuRef{Cdouble}, + @gcsafe_ccall libcublas.cublasDrotmg_v2(handle::cublasHandle_t, d1::CuRef{Cdouble}, + d2::CuRef{Cdouble}, + x1::CuRef{Cdouble}, + y1::CuRef{Cdouble}, param::PtrOrCuPtr{Cdouble})::cublasStatus_t end @@ -516,28 +516,28 @@ end @checked function cublasSgemv_v2(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasSgemv_v2(handle::cublasHandle_t, trans::cublasOperation_t, - m::Cint, n::Cint, alpha::RefOrCuRef{Cfloat}, + m::Cint, n::Cint, alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Cint, x::CuPtr{Cfloat}, - incx::Cint, beta::RefOrCuRef{Cfloat}, + incx::Cint, beta::CuRef{Cfloat}, y::CuPtr{Cfloat}, incy::Cint)::cublasStatus_t end @checked function cublasDgemv_v2(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasDgemv_v2(handle::cublasHandle_t, trans::cublasOperation_t, - m::Cint, n::Cint, alpha::RefOrCuRef{Cdouble}, + m::Cint, n::Cint, alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Cint, x::CuPtr{Cdouble}, - incx::Cint, beta::RefOrCuRef{Cdouble}, + incx::Cint, beta::CuRef{Cdouble}, y::CuPtr{Cdouble}, incy::Cint)::cublasStatus_t end @checked function cublasCgemv_v2(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasCgemv_v2(handle::cublasHandle_t, trans::cublasOperation_t, - m::Cint, n::Cint, alpha::RefOrCuRef{cuComplex}, + m::Cint, n::Cint, alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, x::CuPtr{cuComplex}, incx::Cint, - beta::RefOrCuRef{cuComplex}, y::CuPtr{cuComplex}, + beta::CuRef{cuComplex}, y::CuPtr{cuComplex}, incy::Cint)::cublasStatus_t end @@ -545,10 +545,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZgemv_v2(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, x::CuPtr{cuDoubleComplex}, incx::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, y::CuPtr{cuDoubleComplex}, incy::Cint)::cublasStatus_t end @@ -558,9 +558,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSgbmv_v2(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, n::Cint, kl::Cint, ku::Cint, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Cint, x::CuPtr{Cfloat}, incx::Cint, - beta::RefOrCuRef{Cfloat}, y::CuPtr{Cfloat}, + beta::CuRef{Cfloat}, y::CuPtr{Cfloat}, incy::Cint)::cublasStatus_t end @@ -569,9 +569,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDgbmv_v2(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, n::Cint, kl::Cint, ku::Cint, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Cint, x::CuPtr{Cdouble}, incx::Cint, - beta::RefOrCuRef{Cdouble}, y::CuPtr{Cdouble}, + beta::CuRef{Cdouble}, y::CuPtr{Cdouble}, incy::Cint)::cublasStatus_t end @@ -580,10 +580,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCgbmv_v2(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, n::Cint, kl::Cint, ku::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, x::CuPtr{cuComplex}, incx::Cint, - beta::RefOrCuRef{cuComplex}, y::CuPtr{cuComplex}, + beta::CuRef{cuComplex}, y::CuPtr{cuComplex}, incy::Cint)::cublasStatus_t end @@ -592,10 +592,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZgbmv_v2(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, n::Cint, kl::Cint, ku::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, x::CuPtr{cuDoubleComplex}, incx::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, y::CuPtr{cuDoubleComplex}, incy::Cint)::cublasStatus_t end @@ -812,38 +812,38 @@ end @checked function cublasSsymv_v2(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasSsymv_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cfloat}, + n::Cint, alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Cint, x::CuPtr{Cfloat}, - incx::Cint, beta::RefOrCuRef{Cfloat}, + incx::Cint, beta::CuRef{Cfloat}, y::CuPtr{Cfloat}, incy::Cint)::cublasStatus_t end @checked function cublasDsymv_v2(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasDsymv_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cdouble}, + n::Cint, alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Cint, x::CuPtr{Cdouble}, - incx::Cint, beta::RefOrCuRef{Cdouble}, + incx::Cint, beta::CuRef{Cdouble}, y::CuPtr{Cdouble}, incy::Cint)::cublasStatus_t end @checked function cublasCsymv_v2(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasCsymv_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{cuComplex}, + n::Cint, alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, x::CuPtr{cuComplex}, incx::Cint, - beta::RefOrCuRef{cuComplex}, y::CuPtr{cuComplex}, + beta::CuRef{cuComplex}, y::CuPtr{cuComplex}, incy::Cint)::cublasStatus_t end @checked function cublasZsymv_v2(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasZsymv_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{cuDoubleComplex}, + n::Cint, alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, x::CuPtr{cuDoubleComplex}, incx::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, y::CuPtr{cuDoubleComplex}, incy::Cint)::cublasStatus_t end @@ -851,20 +851,20 @@ end @checked function cublasChemv_v2(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasChemv_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{cuComplex}, + n::Cint, alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, x::CuPtr{cuComplex}, incx::Cint, - beta::RefOrCuRef{cuComplex}, y::CuPtr{cuComplex}, + beta::CuRef{cuComplex}, y::CuPtr{cuComplex}, incy::Cint)::cublasStatus_t end @checked function cublasZhemv_v2(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasZhemv_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{cuDoubleComplex}, + n::Cint, alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, x::CuPtr{cuDoubleComplex}, incx::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, y::CuPtr{cuDoubleComplex}, incy::Cint)::cublasStatus_t end @@ -872,28 +872,28 @@ end @checked function cublasSsbmv_v2(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasSsbmv_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, k::Cint, alpha::RefOrCuRef{Cfloat}, + n::Cint, k::Cint, alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Cint, x::CuPtr{Cfloat}, - incx::Cint, beta::RefOrCuRef{Cfloat}, + incx::Cint, beta::CuRef{Cfloat}, y::CuPtr{Cfloat}, incy::Cint)::cublasStatus_t end @checked function cublasDsbmv_v2(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasDsbmv_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, k::Cint, alpha::RefOrCuRef{Cdouble}, + n::Cint, k::Cint, alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Cint, x::CuPtr{Cdouble}, - incx::Cint, beta::RefOrCuRef{Cdouble}, + incx::Cint, beta::CuRef{Cdouble}, y::CuPtr{Cdouble}, incy::Cint)::cublasStatus_t end @checked function cublasChbmv_v2(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasChbmv_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, k::Cint, alpha::RefOrCuRef{cuComplex}, + n::Cint, k::Cint, alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, x::CuPtr{cuComplex}, incx::Cint, - beta::RefOrCuRef{cuComplex}, y::CuPtr{cuComplex}, + beta::CuRef{cuComplex}, y::CuPtr{cuComplex}, incy::Cint)::cublasStatus_t end @@ -901,10 +901,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZhbmv_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Cint, k::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, x::CuPtr{cuDoubleComplex}, incx::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, y::CuPtr{cuDoubleComplex}, incy::Cint)::cublasStatus_t end @@ -912,37 +912,37 @@ end @checked function cublasSspmv_v2(handle, uplo, n, alpha, AP, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasSspmv_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cfloat}, + n::Cint, alpha::CuRef{Cfloat}, AP::CuPtr{Cfloat}, x::CuPtr{Cfloat}, incx::Cint, - beta::RefOrCuRef{Cfloat}, y::CuPtr{Cfloat}, + beta::CuRef{Cfloat}, y::CuPtr{Cfloat}, incy::Cint)::cublasStatus_t end @checked function cublasDspmv_v2(handle, uplo, n, alpha, AP, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasDspmv_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cdouble}, + n::Cint, alpha::CuRef{Cdouble}, AP::CuPtr{Cdouble}, x::CuPtr{Cdouble}, - incx::Cint, beta::RefOrCuRef{Cdouble}, + incx::Cint, beta::CuRef{Cdouble}, y::CuPtr{Cdouble}, incy::Cint)::cublasStatus_t end @checked function cublasChpmv_v2(handle, uplo, n, alpha, AP, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasChpmv_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{cuComplex}, + n::Cint, alpha::CuRef{cuComplex}, AP::CuPtr{cuComplex}, x::CuPtr{cuComplex}, - incx::Cint, beta::RefOrCuRef{cuComplex}, + incx::Cint, beta::CuRef{cuComplex}, y::CuPtr{cuComplex}, incy::Cint)::cublasStatus_t end @checked function cublasZhpmv_v2(handle, uplo, n, alpha, AP, x, incx, beta, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasZhpmv_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{cuDoubleComplex}, + n::Cint, alpha::CuRef{cuDoubleComplex}, AP::CuPtr{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, y::CuPtr{cuDoubleComplex}, incy::Cint)::cublasStatus_t end @@ -950,7 +950,7 @@ end @checked function cublasSger_v2(handle, m, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasSger_v2(handle::cublasHandle_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cfloat}, x::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, x::CuPtr{Cfloat}, incx::Cint, y::CuPtr{Cfloat}, incy::Cint, A::CuPtr{Cfloat}, lda::Cint)::cublasStatus_t end @@ -958,7 +958,7 @@ end @checked function cublasDger_v2(handle, m, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasDger_v2(handle::cublasHandle_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cdouble}, x::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, x::CuPtr{Cdouble}, incx::Cint, y::CuPtr{Cdouble}, incy::Cint, A::CuPtr{Cdouble}, lda::Cint)::cublasStatus_t end @@ -966,7 +966,7 @@ end @checked function cublasCgeru_v2(handle, m, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasCgeru_v2(handle::cublasHandle_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Cint, y::CuPtr{cuComplex}, incy::Cint, A::CuPtr{cuComplex}, lda::Cint)::cublasStatus_t @@ -975,7 +975,7 @@ end @checked function cublasCgerc_v2(handle, m, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasCgerc_v2(handle::cublasHandle_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Cint, y::CuPtr{cuComplex}, incy::Cint, A::CuPtr{cuComplex}, lda::Cint)::cublasStatus_t @@ -984,7 +984,7 @@ end @checked function cublasZgeru_v2(handle, m, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasZgeru_v2(handle::cublasHandle_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Cint, y::CuPtr{cuDoubleComplex}, incy::Cint, A::CuPtr{cuDoubleComplex}, @@ -994,7 +994,7 @@ end @checked function cublasZgerc_v2(handle, m, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasZgerc_v2(handle::cublasHandle_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Cint, y::CuPtr{cuDoubleComplex}, incy::Cint, A::CuPtr{cuDoubleComplex}, @@ -1004,7 +1004,7 @@ end @checked function cublasSsyr_v2(handle, uplo, n, alpha, x, incx, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasSsyr_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cfloat}, + n::Cint, alpha::CuRef{Cfloat}, x::CuPtr{Cfloat}, incx::Cint, A::CuPtr{Cfloat}, lda::Cint)::cublasStatus_t end @@ -1012,7 +1012,7 @@ end @checked function cublasDsyr_v2(handle, uplo, n, alpha, x, incx, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasDsyr_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cdouble}, + n::Cint, alpha::CuRef{Cdouble}, x::CuPtr{Cdouble}, incx::Cint, A::CuPtr{Cdouble}, lda::Cint)::cublasStatus_t end @@ -1020,7 +1020,7 @@ end @checked function cublasCsyr_v2(handle, uplo, n, alpha, x, incx, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasCsyr_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{cuComplex}, + n::Cint, alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Cint, A::CuPtr{cuComplex}, lda::Cint)::cublasStatus_t end @@ -1028,7 +1028,7 @@ end @checked function cublasZsyr_v2(handle, uplo, n, alpha, x, incx, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasZsyr_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{cuDoubleComplex}, + n::Cint, alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Cint, A::CuPtr{cuDoubleComplex}, lda::Cint)::cublasStatus_t @@ -1037,7 +1037,7 @@ end @checked function cublasCher_v2(handle, uplo, n, alpha, x, incx, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasCher_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cfloat}, + n::Cint, alpha::CuRef{Cfloat}, x::CuPtr{cuComplex}, incx::Cint, A::CuPtr{cuComplex}, lda::Cint)::cublasStatus_t end @@ -1045,7 +1045,7 @@ end @checked function cublasZher_v2(handle, uplo, n, alpha, x, incx, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasZher_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cdouble}, + n::Cint, alpha::CuRef{Cdouble}, x::CuPtr{cuDoubleComplex}, incx::Cint, A::CuPtr{cuDoubleComplex}, lda::Cint)::cublasStatus_t @@ -1054,7 +1054,7 @@ end @checked function cublasSspr_v2(handle, uplo, n, alpha, x, incx, AP) initialize_context() @gcsafe_ccall libcublas.cublasSspr_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cfloat}, + n::Cint, alpha::CuRef{Cfloat}, x::CuPtr{Cfloat}, incx::Cint, AP::CuPtr{Cfloat})::cublasStatus_t end @@ -1062,7 +1062,7 @@ end @checked function cublasDspr_v2(handle, uplo, n, alpha, x, incx, AP) initialize_context() @gcsafe_ccall libcublas.cublasDspr_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cdouble}, + n::Cint, alpha::CuRef{Cdouble}, x::CuPtr{Cdouble}, incx::Cint, AP::CuPtr{Cdouble})::cublasStatus_t end @@ -1070,7 +1070,7 @@ end @checked function cublasChpr_v2(handle, uplo, n, alpha, x, incx, AP) initialize_context() @gcsafe_ccall libcublas.cublasChpr_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cfloat}, + n::Cint, alpha::CuRef{Cfloat}, x::CuPtr{cuComplex}, incx::Cint, AP::CuPtr{cuComplex})::cublasStatus_t end @@ -1078,7 +1078,7 @@ end @checked function cublasZhpr_v2(handle, uplo, n, alpha, x, incx, AP) initialize_context() @gcsafe_ccall libcublas.cublasZhpr_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cdouble}, + n::Cint, alpha::CuRef{Cdouble}, x::CuPtr{cuDoubleComplex}, incx::Cint, AP::CuPtr{cuDoubleComplex})::cublasStatus_t end @@ -1086,7 +1086,7 @@ end @checked function cublasSsyr2_v2(handle, uplo, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasSsyr2_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cfloat}, + n::Cint, alpha::CuRef{Cfloat}, x::CuPtr{Cfloat}, incx::Cint, y::CuPtr{Cfloat}, incy::Cint, A::CuPtr{Cfloat}, lda::Cint)::cublasStatus_t @@ -1095,7 +1095,7 @@ end @checked function cublasDsyr2_v2(handle, uplo, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasDsyr2_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cdouble}, + n::Cint, alpha::CuRef{Cdouble}, x::CuPtr{Cdouble}, incx::Cint, y::CuPtr{Cdouble}, incy::Cint, A::CuPtr{Cdouble}, lda::Cint)::cublasStatus_t @@ -1104,7 +1104,7 @@ end @checked function cublasCsyr2_v2(handle, uplo, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasCsyr2_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{cuComplex}, + n::Cint, alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Cint, y::CuPtr{cuComplex}, incy::Cint, A::CuPtr{cuComplex}, lda::Cint)::cublasStatus_t @@ -1113,7 +1113,7 @@ end @checked function cublasZsyr2_v2(handle, uplo, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasZsyr2_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{cuDoubleComplex}, + n::Cint, alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Cint, y::CuPtr{cuDoubleComplex}, incy::Cint, A::CuPtr{cuDoubleComplex}, @@ -1123,7 +1123,7 @@ end @checked function cublasCher2_v2(handle, uplo, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasCher2_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{cuComplex}, + n::Cint, alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Cint, y::CuPtr{cuComplex}, incy::Cint, A::CuPtr{cuComplex}, lda::Cint)::cublasStatus_t @@ -1132,7 +1132,7 @@ end @checked function cublasZher2_v2(handle, uplo, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasZher2_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{cuDoubleComplex}, + n::Cint, alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Cint, y::CuPtr{cuDoubleComplex}, incy::Cint, A::CuPtr{cuDoubleComplex}, @@ -1142,7 +1142,7 @@ end @checked function cublasSspr2_v2(handle, uplo, n, alpha, x, incx, y, incy, AP) initialize_context() @gcsafe_ccall libcublas.cublasSspr2_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cfloat}, + n::Cint, alpha::CuRef{Cfloat}, x::CuPtr{Cfloat}, incx::Cint, y::CuPtr{Cfloat}, incy::Cint, AP::CuPtr{Cfloat})::cublasStatus_t end @@ -1150,7 +1150,7 @@ end @checked function cublasDspr2_v2(handle, uplo, n, alpha, x, incx, y, incy, AP) initialize_context() @gcsafe_ccall libcublas.cublasDspr2_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{Cdouble}, + n::Cint, alpha::CuRef{Cdouble}, x::CuPtr{Cdouble}, incx::Cint, y::CuPtr{Cdouble}, incy::Cint, AP::CuPtr{Cdouble})::cublasStatus_t end @@ -1158,7 +1158,7 @@ end @checked function cublasChpr2_v2(handle, uplo, n, alpha, x, incx, y, incy, AP) initialize_context() @gcsafe_ccall libcublas.cublasChpr2_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{cuComplex}, + n::Cint, alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Cint, y::CuPtr{cuComplex}, incy::Cint, AP::CuPtr{cuComplex})::cublasStatus_t @@ -1167,7 +1167,7 @@ end @checked function cublasZhpr2_v2(handle, uplo, n, alpha, x, incx, y, incy, AP) initialize_context() @gcsafe_ccall libcublas.cublasZhpr2_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Cint, alpha::RefOrCuRef{cuDoubleComplex}, + n::Cint, alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Cint, y::CuPtr{cuDoubleComplex}, incy::Cint, AP::CuPtr{cuDoubleComplex})::cublasStatus_t @@ -1179,9 +1179,9 @@ end @gcsafe_ccall libcublas.cublasSgemm_v2(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{Cfloat}, + k::Cint, alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Cint, B::CuPtr{Cfloat}, - ldb::Cint, beta::RefOrCuRef{Cfloat}, + ldb::Cint, beta::CuRef{Cfloat}, C::CuPtr{Cfloat}, ldc::Cint)::cublasStatus_t end @@ -1191,9 +1191,9 @@ end @gcsafe_ccall libcublas.cublasDgemm_v2(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{Cdouble}, + k::Cint, alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Cint, B::CuPtr{Cdouble}, - ldb::Cint, beta::RefOrCuRef{Cdouble}, + ldb::Cint, beta::CuRef{Cdouble}, C::CuPtr{Cdouble}, ldc::Cint)::cublasStatus_t end @@ -1203,10 +1203,10 @@ end @gcsafe_ccall libcublas.cublasCgemm_v2(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{cuComplex}, + k::Cint, alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, B::CuPtr{cuComplex}, ldb::Cint, - beta::RefOrCuRef{cuComplex}, C::CuPtr{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Cint)::cublasStatus_t end @@ -1216,10 +1216,10 @@ end @gcsafe_ccall libcublas.cublasZgemm_v2(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{cuDoubleComplex}, + k::Cint, alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, B::CuPtr{cuDoubleComplex}, ldb::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Cint)::cublasStatus_t end @@ -1228,8 +1228,8 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSsyrk_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, - lda::Cint, beta::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, + lda::Cint, beta::CuRef{Cfloat}, C::CuPtr{Cfloat}, ldc::Cint)::cublasStatus_t end @@ -1237,8 +1237,8 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDsyrk_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, - lda::Cint, beta::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, + lda::Cint, beta::CuRef{Cdouble}, C::CuPtr{Cdouble}, ldc::Cint)::cublasStatus_t end @@ -1246,9 +1246,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCsyrk_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, - beta::RefOrCuRef{cuComplex}, C::CuPtr{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Cint)::cublasStatus_t end @@ -1256,9 +1256,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZsyrk_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Cint)::cublasStatus_t end @@ -1267,8 +1267,8 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCherk_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{cuComplex}, - lda::Cint, beta::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{cuComplex}, + lda::Cint, beta::CuRef{Cfloat}, C::CuPtr{cuComplex}, ldc::Cint)::cublasStatus_t end @@ -1276,9 +1276,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZherk_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{cuDoubleComplex}, lda::Cint, - beta::RefOrCuRef{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{cuDoubleComplex}, ldc::Cint)::cublasStatus_t end @@ -1288,9 +1288,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSsyr2k_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Cint, B::CuPtr{Cfloat}, ldb::Cint, - beta::RefOrCuRef{Cfloat}, C::CuPtr{Cfloat}, + beta::CuRef{Cfloat}, C::CuPtr{Cfloat}, ldc::Cint)::cublasStatus_t end @@ -1299,9 +1299,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDsyr2k_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Cint, B::CuPtr{Cdouble}, ldb::Cint, - beta::RefOrCuRef{Cdouble}, C::CuPtr{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{Cdouble}, ldc::Cint)::cublasStatus_t end @@ -1310,10 +1310,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCsyr2k_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, B::CuPtr{cuComplex}, ldb::Cint, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Cint)::cublasStatus_t end @@ -1322,10 +1322,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZsyr2k_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, B::CuPtr{cuDoubleComplex}, ldb::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Cint)::cublasStatus_t end @@ -1335,10 +1335,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCher2k_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, B::CuPtr{cuComplex}, ldb::Cint, - beta::RefOrCuRef{Cfloat}, C::CuPtr{cuComplex}, + beta::CuRef{Cfloat}, C::CuPtr{cuComplex}, ldc::Cint)::cublasStatus_t end @@ -1347,10 +1347,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZher2k_v2(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, B::CuPtr{cuDoubleComplex}, ldb::Cint, - beta::RefOrCuRef{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{cuDoubleComplex}, ldc::Cint)::cublasStatus_t end @@ -1365,9 +1365,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSsymm_v2(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Cint, B::CuPtr{Cfloat}, ldb::Cint, - beta::RefOrCuRef{Cfloat}, C::CuPtr{Cfloat}, + beta::CuRef{Cfloat}, C::CuPtr{Cfloat}, ldc::Cint)::cublasStatus_t end @@ -1376,9 +1376,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDsymm_v2(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Cint, B::CuPtr{Cdouble}, ldb::Cint, - beta::RefOrCuRef{Cdouble}, C::CuPtr{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{Cdouble}, ldc::Cint)::cublasStatus_t end @@ -1387,10 +1387,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCsymm_v2(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, B::CuPtr{cuComplex}, ldb::Cint, - beta::RefOrCuRef{cuComplex}, C::CuPtr{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Cint)::cublasStatus_t end @@ -1399,10 +1399,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZsymm_v2(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, B::CuPtr{cuDoubleComplex}, ldb::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Cint)::cublasStatus_t end @@ -1412,10 +1412,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasChemm_v2(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, B::CuPtr{cuComplex}, ldb::Cint, - beta::RefOrCuRef{cuComplex}, C::CuPtr{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Cint)::cublasStatus_t end @@ -1424,10 +1424,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZhemm_v2(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, B::CuPtr{cuDoubleComplex}, ldb::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Cint)::cublasStatus_t end @@ -1438,7 +1438,7 @@ end @gcsafe_ccall libcublas.cublasStrsm_v2(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Cint, B::CuPtr{Cfloat}, ldb::Cint)::cublasStatus_t end @@ -1449,7 +1449,7 @@ end @gcsafe_ccall libcublas.cublasDtrsm_v2(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Cint, B::CuPtr{Cdouble}, ldb::Cint)::cublasStatus_t end @@ -1460,7 +1460,7 @@ end @gcsafe_ccall libcublas.cublasCtrsm_v2(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, B::CuPtr{cuComplex}, ldb::Cint)::cublasStatus_t end @@ -1471,7 +1471,7 @@ end @gcsafe_ccall libcublas.cublasZtrsm_v2(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, B::CuPtr{cuDoubleComplex}, ldb::Cint)::cublasStatus_t @@ -1483,7 +1483,7 @@ end @gcsafe_ccall libcublas.cublasStrmm_v2(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Cint, B::CuPtr{Cfloat}, ldb::Cint, C::CuPtr{Cfloat}, ldc::Cint)::cublasStatus_t end @@ -1494,7 +1494,7 @@ end @gcsafe_ccall libcublas.cublasDtrmm_v2(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Cint, B::CuPtr{Cdouble}, ldb::Cint, C::CuPtr{Cdouble}, ldc::Cint)::cublasStatus_t end @@ -1505,7 +1505,7 @@ end @gcsafe_ccall libcublas.cublasCtrmm_v2(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, B::CuPtr{cuComplex}, ldb::Cint, C::CuPtr{cuComplex}, ldc::Cint)::cublasStatus_t @@ -1517,7 +1517,7 @@ end @gcsafe_ccall libcublas.cublasZtrmm_v2(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, B::CuPtr{cuDoubleComplex}, ldb::Cint, C::CuPtr{cuDoubleComplex}, @@ -1528,28 +1528,28 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSnrm2_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{Cfloat}, incx::Int64, - result::RefOrCuRef{Cfloat})::cublasStatus_t + result::CuRef{Cfloat})::cublasStatus_t end @checked function cublasDnrm2_v2_64(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasDnrm2_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{Cdouble}, incx::Int64, - result::RefOrCuRef{Cdouble})::cublasStatus_t + result::CuRef{Cdouble})::cublasStatus_t end @checked function cublasScnrm2_v2_64(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasScnrm2_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuComplex}, incx::Int64, - result::RefOrCuRef{Cfloat})::cublasStatus_t + result::CuRef{Cfloat})::cublasStatus_t end @checked function cublasDznrm2_v2_64(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasDznrm2_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuDoubleComplex}, incx::Int64, - result::RefOrCuRef{Cdouble})::cublasStatus_t + result::CuRef{Cdouble})::cublasStatus_t end @checked function cublasSdot_v2_64(handle, n, x, incx, y, incy, result) @@ -1557,7 +1557,7 @@ end @gcsafe_ccall libcublas.cublasSdot_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{Cfloat}, incx::Int64, y::CuPtr{Cfloat}, incy::Int64, - result::RefOrCuRef{Cfloat})::cublasStatus_t + result::CuRef{Cfloat})::cublasStatus_t end @checked function cublasDdot_v2_64(handle, n, x, incx, y, incy, result) @@ -1565,7 +1565,7 @@ end @gcsafe_ccall libcublas.cublasDdot_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{Cdouble}, incx::Int64, y::CuPtr{Cdouble}, incy::Int64, - result::RefOrCuRef{Cdouble})::cublasStatus_t + result::CuRef{Cdouble})::cublasStatus_t end @checked function cublasCdotu_v2_64(handle, n, x, incx, y, incy, result) @@ -1573,7 +1573,7 @@ end @gcsafe_ccall libcublas.cublasCdotu_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuComplex}, incx::Int64, y::CuPtr{cuComplex}, incy::Int64, - result::RefOrCuRef{cuComplex})::cublasStatus_t + result::CuRef{cuComplex})::cublasStatus_t end @checked function cublasCdotc_v2_64(handle, n, x, incx, y, incy, result) @@ -1581,7 +1581,7 @@ end @gcsafe_ccall libcublas.cublasCdotc_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuComplex}, incx::Int64, y::CuPtr{cuComplex}, incy::Int64, - result::RefOrCuRef{cuComplex})::cublasStatus_t + result::CuRef{cuComplex})::cublasStatus_t end @checked function cublasZdotu_v2_64(handle, n, x, incx, y, incy, result) @@ -1589,7 +1589,7 @@ end @gcsafe_ccall libcublas.cublasZdotu_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuDoubleComplex}, incx::Int64, y::CuPtr{cuDoubleComplex}, incy::Int64, - result::RefOrCuRef{cuDoubleComplex})::cublasStatus_t + result::CuRef{cuDoubleComplex})::cublasStatus_t end @checked function cublasZdotc_v2_64(handle, n, x, incx, y, incy, result) @@ -1597,27 +1597,27 @@ end @gcsafe_ccall libcublas.cublasZdotc_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuDoubleComplex}, incx::Int64, y::CuPtr{cuDoubleComplex}, incy::Int64, - result::RefOrCuRef{cuDoubleComplex})::cublasStatus_t + result::CuRef{cuDoubleComplex})::cublasStatus_t end @checked function cublasSscal_v2_64(handle, n, alpha, x, incx) initialize_context() @gcsafe_ccall libcublas.cublasSscal_v2_64(handle::cublasHandle_t, n::Int64, - alpha::RefOrCuRef{Cfloat}, x::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, x::CuPtr{Cfloat}, incx::Int64)::cublasStatus_t end @checked function cublasDscal_v2_64(handle, n, alpha, x, incx) initialize_context() @gcsafe_ccall libcublas.cublasDscal_v2_64(handle::cublasHandle_t, n::Int64, - alpha::RefOrCuRef{Cdouble}, x::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, x::CuPtr{Cdouble}, incx::Int64)::cublasStatus_t end @checked function cublasCscal_v2_64(handle, n, alpha, x, incx) initialize_context() @gcsafe_ccall libcublas.cublasCscal_v2_64(handle::cublasHandle_t, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Int64)::cublasStatus_t end @@ -1625,7 +1625,7 @@ end @checked function cublasCsscal_v2_64(handle, n, alpha, x, incx) initialize_context() @gcsafe_ccall libcublas.cublasCsscal_v2_64(handle::cublasHandle_t, n::Int64, - alpha::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, x::CuPtr{cuComplex}, incx::Int64)::cublasStatus_t end @@ -1633,7 +1633,7 @@ end @checked function cublasZscal_v2_64(handle, n, alpha, x, incx) initialize_context() @gcsafe_ccall libcublas.cublasZscal_v2_64(handle::cublasHandle_t, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Int64)::cublasStatus_t end @@ -1641,7 +1641,7 @@ end @checked function cublasZdscal_v2_64(handle, n, alpha, x, incx) initialize_context() @gcsafe_ccall libcublas.cublasZdscal_v2_64(handle::cublasHandle_t, n::Int64, - alpha::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, x::CuPtr{cuDoubleComplex}, incx::Int64)::cublasStatus_t end @@ -1649,7 +1649,7 @@ end @checked function cublasSaxpy_v2_64(handle, n, alpha, x, incx, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasSaxpy_v2_64(handle::cublasHandle_t, n::Int64, - alpha::RefOrCuRef{Cfloat}, x::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, x::CuPtr{Cfloat}, incx::Int64, y::CuPtr{Cfloat}, incy::Int64)::cublasStatus_t end @@ -1657,7 +1657,7 @@ end @checked function cublasDaxpy_v2_64(handle, n, alpha, x, incx, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasDaxpy_v2_64(handle::cublasHandle_t, n::Int64, - alpha::RefOrCuRef{Cdouble}, x::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, x::CuPtr{Cdouble}, incx::Int64, y::CuPtr{Cdouble}, incy::Int64)::cublasStatus_t end @@ -1665,7 +1665,7 @@ end @checked function cublasCaxpy_v2_64(handle, n, alpha, x, incx, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasCaxpy_v2_64(handle::cublasHandle_t, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Int64, y::CuPtr{cuComplex}, incy::Int64)::cublasStatus_t @@ -1674,7 +1674,7 @@ end @checked function cublasZaxpy_v2_64(handle, n, alpha, x, incx, y, incy) initialize_context() @gcsafe_ccall libcublas.cublasZaxpy_v2_64(handle::cublasHandle_t, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Int64, y::CuPtr{cuDoubleComplex}, incy::Int64)::cublasStatus_t @@ -1746,84 +1746,84 @@ end initialize_context() @gcsafe_ccall libcublas.cublasIsamax_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{Cfloat}, incx::Int64, - result::RefOrCuRef{Int64})::cublasStatus_t + result::CuRef{Int64})::cublasStatus_t end @checked function cublasIdamax_v2_64(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIdamax_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{Cdouble}, incx::Int64, - result::RefOrCuRef{Int64})::cublasStatus_t + result::CuRef{Int64})::cublasStatus_t end @checked function cublasIcamax_v2_64(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIcamax_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuComplex}, incx::Int64, - result::RefOrCuRef{Int64})::cublasStatus_t + result::CuRef{Int64})::cublasStatus_t end @checked function cublasIzamax_v2_64(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIzamax_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuDoubleComplex}, incx::Int64, - result::RefOrCuRef{Int64})::cublasStatus_t + result::CuRef{Int64})::cublasStatus_t end @checked function cublasIsamin_v2_64(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIsamin_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{Cfloat}, incx::Int64, - result::RefOrCuRef{Int64})::cublasStatus_t + result::CuRef{Int64})::cublasStatus_t end @checked function cublasIdamin_v2_64(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIdamin_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{Cdouble}, incx::Int64, - result::RefOrCuRef{Int64})::cublasStatus_t + result::CuRef{Int64})::cublasStatus_t end @checked function cublasIcamin_v2_64(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIcamin_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuComplex}, incx::Int64, - result::RefOrCuRef{Int64})::cublasStatus_t + result::CuRef{Int64})::cublasStatus_t end @checked function cublasIzamin_v2_64(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIzamin_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuDoubleComplex}, incx::Int64, - result::RefOrCuRef{Int64})::cublasStatus_t + result::CuRef{Int64})::cublasStatus_t end @checked function cublasSasum_v2_64(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasSasum_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{Cfloat}, incx::Int64, - result::RefOrCuRef{Cfloat})::cublasStatus_t + result::CuRef{Cfloat})::cublasStatus_t end @checked function cublasDasum_v2_64(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasDasum_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{Cdouble}, incx::Int64, - result::RefOrCuRef{Cdouble})::cublasStatus_t + result::CuRef{Cdouble})::cublasStatus_t end @checked function cublasScasum_v2_64(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasScasum_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuComplex}, incx::Int64, - result::RefOrCuRef{Cfloat})::cublasStatus_t + result::CuRef{Cfloat})::cublasStatus_t end @checked function cublasDzasum_v2_64(handle, n, x, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasDzasum_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuDoubleComplex}, incx::Int64, - result::RefOrCuRef{Cdouble})::cublasStatus_t + result::CuRef{Cdouble})::cublasStatus_t end @checked function cublasSrot_v2_64(handle, n, x, incx, y, incy, c, s) @@ -1831,8 +1831,8 @@ end @gcsafe_ccall libcublas.cublasSrot_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{Cfloat}, incx::Int64, y::CuPtr{Cfloat}, incy::Int64, - c::RefOrCuRef{Cfloat}, - s::RefOrCuRef{Cfloat})::cublasStatus_t + c::CuRef{Cfloat}, + s::CuRef{Cfloat})::cublasStatus_t end @checked function cublasDrot_v2_64(handle, n, x, incx, y, incy, c, s) @@ -1840,8 +1840,8 @@ end @gcsafe_ccall libcublas.cublasDrot_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{Cdouble}, incx::Int64, y::CuPtr{Cdouble}, incy::Int64, - c::RefOrCuRef{Cdouble}, - s::RefOrCuRef{Cdouble})::cublasStatus_t + c::CuRef{Cdouble}, + s::CuRef{Cdouble})::cublasStatus_t end @checked function cublasCrot_v2_64(handle, n, x, incx, y, incy, c, s) @@ -1849,8 +1849,8 @@ end @gcsafe_ccall libcublas.cublasCrot_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuComplex}, incx::Int64, y::CuPtr{cuComplex}, incy::Int64, - c::RefOrCuRef{Cfloat}, - s::RefOrCuRef{cuComplex})::cublasStatus_t + c::CuRef{Cfloat}, + s::CuRef{cuComplex})::cublasStatus_t end @checked function cublasCsrot_v2_64(handle, n, x, incx, y, incy, c, s) @@ -1858,8 +1858,8 @@ end @gcsafe_ccall libcublas.cublasCsrot_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuComplex}, incx::Int64, y::CuPtr{cuComplex}, incy::Int64, - c::RefOrCuRef{Cfloat}, - s::RefOrCuRef{Cfloat})::cublasStatus_t + c::CuRef{Cfloat}, + s::CuRef{Cfloat})::cublasStatus_t end @checked function cublasZrot_v2_64(handle, n, x, incx, y, incy, c, s) @@ -1867,8 +1867,8 @@ end @gcsafe_ccall libcublas.cublasZrot_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuDoubleComplex}, incx::Int64, y::CuPtr{cuDoubleComplex}, incy::Int64, - c::RefOrCuRef{Cdouble}, - s::RefOrCuRef{cuDoubleComplex})::cublasStatus_t + c::CuRef{Cdouble}, + s::CuRef{cuDoubleComplex})::cublasStatus_t end @checked function cublasZdrot_v2_64(handle, n, x, incx, y, incy, c, s) @@ -1876,8 +1876,8 @@ end @gcsafe_ccall libcublas.cublasZdrot_v2_64(handle::cublasHandle_t, n::Int64, x::CuPtr{cuDoubleComplex}, incx::Int64, y::CuPtr{cuDoubleComplex}, incy::Int64, - c::RefOrCuRef{Cdouble}, - s::RefOrCuRef{Cdouble})::cublasStatus_t + c::CuRef{Cdouble}, + s::CuRef{Cdouble})::cublasStatus_t end @checked function cublasSrotm_v2_64(handle, n, x, incx, y, incy, param) @@ -1901,9 +1901,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSgemv_v2_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, n::Int64, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Int64, x::CuPtr{Cfloat}, incx::Int64, - beta::RefOrCuRef{Cfloat}, y::CuPtr{Cfloat}, + beta::CuRef{Cfloat}, y::CuPtr{Cfloat}, incy::Int64)::cublasStatus_t end @@ -1912,9 +1912,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDgemv_v2_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, n::Int64, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Int64, x::CuPtr{Cdouble}, incx::Int64, - beta::RefOrCuRef{Cdouble}, y::CuPtr{Cdouble}, + beta::CuRef{Cdouble}, y::CuPtr{Cdouble}, incy::Int64)::cublasStatus_t end @@ -1923,10 +1923,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCgemv_v2_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, x::CuPtr{cuComplex}, incx::Int64, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, y::CuPtr{cuComplex}, incy::Int64)::cublasStatus_t end @@ -1936,10 +1936,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZgemv_v2_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, x::CuPtr{cuDoubleComplex}, incx::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, y::CuPtr{cuDoubleComplex}, incy::Int64)::cublasStatus_t end @@ -1950,9 +1950,9 @@ end @gcsafe_ccall libcublas.cublasSgbmv_v2_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, n::Int64, kl::Int64, ku::Int64, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Int64, x::CuPtr{Cfloat}, incx::Int64, - beta::RefOrCuRef{Cfloat}, y::CuPtr{Cfloat}, + beta::CuRef{Cfloat}, y::CuPtr{Cfloat}, incy::Int64)::cublasStatus_t end @@ -1962,9 +1962,9 @@ end @gcsafe_ccall libcublas.cublasDgbmv_v2_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, n::Int64, kl::Int64, ku::Int64, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Int64, x::CuPtr{Cdouble}, incx::Int64, - beta::RefOrCuRef{Cdouble}, y::CuPtr{Cdouble}, + beta::CuRef{Cdouble}, y::CuPtr{Cdouble}, incy::Int64)::cublasStatus_t end @@ -1974,10 +1974,10 @@ end @gcsafe_ccall libcublas.cublasCgbmv_v2_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, n::Int64, kl::Int64, ku::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, x::CuPtr{cuComplex}, incx::Int64, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, y::CuPtr{cuComplex}, incy::Int64)::cublasStatus_t end @@ -1988,10 +1988,10 @@ end @gcsafe_ccall libcublas.cublasZgbmv_v2_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, n::Int64, kl::Int64, ku::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, x::CuPtr{cuDoubleComplex}, incx::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, y::CuPtr{cuDoubleComplex}, incy::Int64)::cublasStatus_t end @@ -2254,9 +2254,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSsymv_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Int64, x::CuPtr{Cfloat}, incx::Int64, - beta::RefOrCuRef{Cfloat}, y::CuPtr{Cfloat}, + beta::CuRef{Cfloat}, y::CuPtr{Cfloat}, incy::Int64)::cublasStatus_t end @@ -2264,9 +2264,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDsymv_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Int64, x::CuPtr{Cdouble}, incx::Int64, - beta::RefOrCuRef{Cdouble}, y::CuPtr{Cdouble}, + beta::CuRef{Cdouble}, y::CuPtr{Cdouble}, incy::Int64)::cublasStatus_t end @@ -2274,10 +2274,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCsymv_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, x::CuPtr{cuComplex}, incx::Int64, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, y::CuPtr{cuComplex}, incy::Int64)::cublasStatus_t end @@ -2286,10 +2286,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZsymv_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, x::CuPtr{cuDoubleComplex}, incx::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, y::CuPtr{cuDoubleComplex}, incy::Int64)::cublasStatus_t end @@ -2298,10 +2298,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasChemv_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, x::CuPtr{cuComplex}, incx::Int64, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, y::CuPtr{cuComplex}, incy::Int64)::cublasStatus_t end @@ -2310,10 +2310,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZhemv_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, x::CuPtr{cuDoubleComplex}, incx::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, y::CuPtr{cuDoubleComplex}, incy::Int64)::cublasStatus_t end @@ -2323,9 +2323,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSsbmv_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, k::Int64, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Int64, x::CuPtr{Cfloat}, incx::Int64, - beta::RefOrCuRef{Cfloat}, y::CuPtr{Cfloat}, + beta::CuRef{Cfloat}, y::CuPtr{Cfloat}, incy::Int64)::cublasStatus_t end @@ -2334,9 +2334,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDsbmv_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, k::Int64, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Int64, x::CuPtr{Cdouble}, incx::Int64, - beta::RefOrCuRef{Cdouble}, y::CuPtr{Cdouble}, + beta::CuRef{Cdouble}, y::CuPtr{Cdouble}, incy::Int64)::cublasStatus_t end @@ -2345,10 +2345,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasChbmv_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, k::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, x::CuPtr{cuComplex}, incx::Int64, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, y::CuPtr{cuComplex}, incy::Int64)::cublasStatus_t end @@ -2358,10 +2358,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZhbmv_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, k::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, x::CuPtr{cuDoubleComplex}, incx::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, y::CuPtr{cuDoubleComplex}, incy::Int64)::cublasStatus_t end @@ -2370,9 +2370,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSspmv_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{Cfloat}, AP::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, AP::CuPtr{Cfloat}, x::CuPtr{Cfloat}, incx::Int64, - beta::RefOrCuRef{Cfloat}, y::CuPtr{Cfloat}, + beta::CuRef{Cfloat}, y::CuPtr{Cfloat}, incy::Int64)::cublasStatus_t end @@ -2380,9 +2380,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDspmv_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, AP::CuPtr{Cdouble}, x::CuPtr{Cdouble}, - incx::Int64, beta::RefOrCuRef{Cdouble}, + incx::Int64, beta::CuRef{Cdouble}, y::CuPtr{Cdouble}, incy::Int64)::cublasStatus_t end @@ -2391,9 +2391,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasChpmv_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, AP::CuPtr{cuComplex}, x::CuPtr{cuComplex}, - incx::Int64, beta::RefOrCuRef{cuComplex}, + incx::Int64, beta::CuRef{cuComplex}, y::CuPtr{cuComplex}, incy::Int64)::cublasStatus_t end @@ -2402,10 +2402,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZhpmv_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, AP::CuPtr{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, y::CuPtr{cuDoubleComplex}, incy::Int64)::cublasStatus_t end @@ -2413,7 +2413,7 @@ end @checked function cublasSger_v2_64(handle, m, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasSger_v2_64(handle::cublasHandle_t, m::Int64, n::Int64, - alpha::RefOrCuRef{Cfloat}, x::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, x::CuPtr{Cfloat}, incx::Int64, y::CuPtr{Cfloat}, incy::Int64, A::CuPtr{Cfloat}, lda::Int64)::cublasStatus_t end @@ -2421,7 +2421,7 @@ end @checked function cublasDger_v2_64(handle, m, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasDger_v2_64(handle::cublasHandle_t, m::Int64, n::Int64, - alpha::RefOrCuRef{Cdouble}, x::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, x::CuPtr{Cdouble}, incx::Int64, y::CuPtr{Cdouble}, incy::Int64, A::CuPtr{Cdouble}, lda::Int64)::cublasStatus_t end @@ -2429,7 +2429,7 @@ end @checked function cublasCgeru_v2_64(handle, m, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasCgeru_v2_64(handle::cublasHandle_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Int64, y::CuPtr{cuComplex}, incy::Int64, A::CuPtr{cuComplex}, @@ -2439,7 +2439,7 @@ end @checked function cublasCgerc_v2_64(handle, m, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasCgerc_v2_64(handle::cublasHandle_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Int64, y::CuPtr{cuComplex}, incy::Int64, A::CuPtr{cuComplex}, @@ -2449,7 +2449,7 @@ end @checked function cublasZgeru_v2_64(handle, m, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasZgeru_v2_64(handle::cublasHandle_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Int64, y::CuPtr{cuDoubleComplex}, incy::Int64, A::CuPtr{cuDoubleComplex}, @@ -2459,7 +2459,7 @@ end @checked function cublasZgerc_v2_64(handle, m, n, alpha, x, incx, y, incy, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasZgerc_v2_64(handle::cublasHandle_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Int64, y::CuPtr{cuDoubleComplex}, incy::Int64, A::CuPtr{cuDoubleComplex}, @@ -2469,7 +2469,7 @@ end @checked function cublasSsyr_v2_64(handle, uplo, n, alpha, x, incx, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasSsyr_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Int64, alpha::RefOrCuRef{Cfloat}, + n::Int64, alpha::CuRef{Cfloat}, x::CuPtr{Cfloat}, incx::Int64, A::CuPtr{Cfloat}, lda::Int64)::cublasStatus_t end @@ -2477,7 +2477,7 @@ end @checked function cublasDsyr_v2_64(handle, uplo, n, alpha, x, incx, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasDsyr_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Int64, alpha::RefOrCuRef{Cdouble}, + n::Int64, alpha::CuRef{Cdouble}, x::CuPtr{Cdouble}, incx::Int64, A::CuPtr{Cdouble}, lda::Int64)::cublasStatus_t end @@ -2485,7 +2485,7 @@ end @checked function cublasCsyr_v2_64(handle, uplo, n, alpha, x, incx, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasCsyr_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Int64, alpha::RefOrCuRef{cuComplex}, + n::Int64, alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Int64, A::CuPtr{cuComplex}, lda::Int64)::cublasStatus_t @@ -2494,7 +2494,7 @@ end @checked function cublasZsyr_v2_64(handle, uplo, n, alpha, x, incx, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasZsyr_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Int64, alpha::RefOrCuRef{cuDoubleComplex}, + n::Int64, alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Int64, A::CuPtr{cuDoubleComplex}, lda::Int64)::cublasStatus_t @@ -2503,7 +2503,7 @@ end @checked function cublasCher_v2_64(handle, uplo, n, alpha, x, incx, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasCher_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Int64, alpha::RefOrCuRef{Cfloat}, + n::Int64, alpha::CuRef{Cfloat}, x::CuPtr{cuComplex}, incx::Int64, A::CuPtr{cuComplex}, lda::Int64)::cublasStatus_t @@ -2512,7 +2512,7 @@ end @checked function cublasZher_v2_64(handle, uplo, n, alpha, x, incx, A, lda) initialize_context() @gcsafe_ccall libcublas.cublasZher_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Int64, alpha::RefOrCuRef{Cdouble}, + n::Int64, alpha::CuRef{Cdouble}, x::CuPtr{cuDoubleComplex}, incx::Int64, A::CuPtr{cuDoubleComplex}, lda::Int64)::cublasStatus_t @@ -2521,7 +2521,7 @@ end @checked function cublasSspr_v2_64(handle, uplo, n, alpha, x, incx, AP) initialize_context() @gcsafe_ccall libcublas.cublasSspr_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Int64, alpha::RefOrCuRef{Cfloat}, + n::Int64, alpha::CuRef{Cfloat}, x::CuPtr{Cfloat}, incx::Int64, AP::CuPtr{Cfloat})::cublasStatus_t end @@ -2529,7 +2529,7 @@ end @checked function cublasDspr_v2_64(handle, uplo, n, alpha, x, incx, AP) initialize_context() @gcsafe_ccall libcublas.cublasDspr_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Int64, alpha::RefOrCuRef{Cdouble}, + n::Int64, alpha::CuRef{Cdouble}, x::CuPtr{Cdouble}, incx::Int64, AP::CuPtr{Cdouble})::cublasStatus_t end @@ -2537,7 +2537,7 @@ end @checked function cublasChpr_v2_64(handle, uplo, n, alpha, x, incx, AP) initialize_context() @gcsafe_ccall libcublas.cublasChpr_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Int64, alpha::RefOrCuRef{Cfloat}, + n::Int64, alpha::CuRef{Cfloat}, x::CuPtr{cuComplex}, incx::Int64, AP::CuPtr{cuComplex})::cublasStatus_t end @@ -2545,7 +2545,7 @@ end @checked function cublasZhpr_v2_64(handle, uplo, n, alpha, x, incx, AP) initialize_context() @gcsafe_ccall libcublas.cublasZhpr_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, - n::Int64, alpha::RefOrCuRef{Cdouble}, + n::Int64, alpha::CuRef{Cdouble}, x::CuPtr{cuDoubleComplex}, incx::Int64, AP::CuPtr{cuDoubleComplex})::cublasStatus_t end @@ -2554,7 +2554,7 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSsyr2_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{Cfloat}, x::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, x::CuPtr{Cfloat}, incx::Int64, y::CuPtr{Cfloat}, incy::Int64, A::CuPtr{Cfloat}, lda::Int64)::cublasStatus_t end @@ -2563,7 +2563,7 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDsyr2_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{Cdouble}, x::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, x::CuPtr{Cdouble}, incx::Int64, y::CuPtr{Cdouble}, incy::Int64, A::CuPtr{Cdouble}, lda::Int64)::cublasStatus_t end @@ -2572,7 +2572,7 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCsyr2_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Int64, y::CuPtr{cuComplex}, incy::Int64, A::CuPtr{cuComplex}, @@ -2583,7 +2583,7 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZsyr2_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Int64, y::CuPtr{cuDoubleComplex}, incy::Int64, A::CuPtr{cuDoubleComplex}, @@ -2594,7 +2594,7 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCher2_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Int64, y::CuPtr{cuComplex}, incy::Int64, A::CuPtr{cuComplex}, @@ -2605,7 +2605,7 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZher2_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Int64, y::CuPtr{cuDoubleComplex}, incy::Int64, A::CuPtr{cuDoubleComplex}, @@ -2616,7 +2616,7 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSspr2_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{Cfloat}, x::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, x::CuPtr{Cfloat}, incx::Int64, y::CuPtr{Cfloat}, incy::Int64, AP::CuPtr{Cfloat})::cublasStatus_t end @@ -2625,7 +2625,7 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDspr2_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{Cdouble}, x::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, x::CuPtr{Cdouble}, incx::Int64, y::CuPtr{Cdouble}, incy::Int64, AP::CuPtr{Cdouble})::cublasStatus_t end @@ -2634,7 +2634,7 @@ end initialize_context() @gcsafe_ccall libcublas.cublasChpr2_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, x::CuPtr{cuComplex}, incx::Int64, y::CuPtr{cuComplex}, incy::Int64, AP::CuPtr{cuComplex})::cublasStatus_t @@ -2644,7 +2644,7 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZhpr2_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, x::CuPtr{cuDoubleComplex}, incx::Int64, y::CuPtr{cuDoubleComplex}, incy::Int64, AP::CuPtr{cuDoubleComplex})::cublasStatus_t @@ -2656,10 +2656,10 @@ end @gcsafe_ccall libcublas.cublasSgemm_v2_64(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, - k::Int64, alpha::RefOrCuRef{Cfloat}, + k::Int64, alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Int64, B::CuPtr{Cfloat}, ldb::Int64, - beta::RefOrCuRef{Cfloat}, C::CuPtr{Cfloat}, + beta::CuRef{Cfloat}, C::CuPtr{Cfloat}, ldc::Int64)::cublasStatus_t end @@ -2669,10 +2669,10 @@ end @gcsafe_ccall libcublas.cublasDgemm_v2_64(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, - k::Int64, alpha::RefOrCuRef{Cdouble}, + k::Int64, alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Int64, B::CuPtr{Cdouble}, ldb::Int64, - beta::RefOrCuRef{Cdouble}, C::CuPtr{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{Cdouble}, ldc::Int64)::cublasStatus_t end @@ -2682,10 +2682,10 @@ end @gcsafe_ccall libcublas.cublasCgemm_v2_64(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, - k::Int64, alpha::RefOrCuRef{cuComplex}, + k::Int64, alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, B::CuPtr{cuComplex}, ldb::Int64, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Int64)::cublasStatus_t end @@ -2696,10 +2696,10 @@ end @gcsafe_ccall libcublas.cublasZgemm_v2_64(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, - k::Int64, alpha::RefOrCuRef{cuDoubleComplex}, + k::Int64, alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, B::CuPtr{cuDoubleComplex}, ldb::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Int64)::cublasStatus_t end @@ -2709,8 +2709,8 @@ end @gcsafe_ccall libcublas.cublasSsyrk_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, - lda::Int64, beta::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, + lda::Int64, beta::CuRef{Cfloat}, C::CuPtr{Cfloat}, ldc::Int64)::cublasStatus_t end @@ -2719,8 +2719,8 @@ end @gcsafe_ccall libcublas.cublasDsyrk_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, - lda::Int64, beta::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, + lda::Int64, beta::CuRef{Cdouble}, C::CuPtr{Cdouble}, ldc::Int64)::cublasStatus_t end @@ -2729,9 +2729,9 @@ end @gcsafe_ccall libcublas.cublasCsyrk_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Int64)::cublasStatus_t end @@ -2741,9 +2741,9 @@ end @gcsafe_ccall libcublas.cublasZsyrk_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Int64)::cublasStatus_t end @@ -2753,9 +2753,9 @@ end @gcsafe_ccall libcublas.cublasCherk_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{cuComplex}, lda::Int64, - beta::RefOrCuRef{Cfloat}, C::CuPtr{cuComplex}, + beta::CuRef{Cfloat}, C::CuPtr{cuComplex}, ldc::Int64)::cublasStatus_t end @@ -2764,9 +2764,9 @@ end @gcsafe_ccall libcublas.cublasZherk_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{cuDoubleComplex}, lda::Int64, - beta::RefOrCuRef{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{cuDoubleComplex}, ldc::Int64)::cublasStatus_t end @@ -2777,9 +2777,9 @@ end @gcsafe_ccall libcublas.cublasSsyr2k_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Int64, B::CuPtr{Cfloat}, ldb::Int64, - beta::RefOrCuRef{Cfloat}, C::CuPtr{Cfloat}, + beta::CuRef{Cfloat}, C::CuPtr{Cfloat}, ldc::Int64)::cublasStatus_t end @@ -2789,10 +2789,10 @@ end @gcsafe_ccall libcublas.cublasDsyr2k_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Int64, B::CuPtr{Cdouble}, ldb::Int64, - beta::RefOrCuRef{Cdouble}, C::CuPtr{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{Cdouble}, ldc::Int64)::cublasStatus_t end @@ -2802,10 +2802,10 @@ end @gcsafe_ccall libcublas.cublasCsyr2k_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, B::CuPtr{cuComplex}, ldb::Int64, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Int64)::cublasStatus_t end @@ -2816,10 +2816,10 @@ end @gcsafe_ccall libcublas.cublasZsyr2k_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, B::CuPtr{cuDoubleComplex}, ldb::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Int64)::cublasStatus_t end @@ -2830,10 +2830,10 @@ end @gcsafe_ccall libcublas.cublasCher2k_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, B::CuPtr{cuComplex}, ldb::Int64, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, C::CuPtr{cuComplex}, ldc::Int64)::cublasStatus_t end @@ -2844,10 +2844,10 @@ end @gcsafe_ccall libcublas.cublasZher2k_v2_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, B::CuPtr{cuDoubleComplex}, ldb::Int64, - beta::RefOrCuRef{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{cuDoubleComplex}, ldc::Int64)::cublasStatus_t end @@ -2858,9 +2858,9 @@ end @gcsafe_ccall libcublas.cublasSsymm_v2_64(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Int64, n::Int64, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Int64, B::CuPtr{Cfloat}, ldb::Int64, - beta::RefOrCuRef{Cfloat}, C::CuPtr{Cfloat}, + beta::CuRef{Cfloat}, C::CuPtr{Cfloat}, ldc::Int64)::cublasStatus_t end @@ -2870,9 +2870,9 @@ end @gcsafe_ccall libcublas.cublasDsymm_v2_64(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Int64, n::Int64, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Int64, B::CuPtr{Cdouble}, ldb::Int64, - beta::RefOrCuRef{Cdouble}, C::CuPtr{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{Cdouble}, ldc::Int64)::cublasStatus_t end @@ -2882,10 +2882,10 @@ end @gcsafe_ccall libcublas.cublasCsymm_v2_64(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, B::CuPtr{cuComplex}, ldb::Int64, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Int64)::cublasStatus_t end @@ -2896,10 +2896,10 @@ end @gcsafe_ccall libcublas.cublasZsymm_v2_64(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, B::CuPtr{cuDoubleComplex}, ldb::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Int64)::cublasStatus_t end @@ -2910,10 +2910,10 @@ end @gcsafe_ccall libcublas.cublasChemm_v2_64(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, B::CuPtr{cuComplex}, ldb::Int64, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Int64)::cublasStatus_t end @@ -2924,10 +2924,10 @@ end @gcsafe_ccall libcublas.cublasZhemm_v2_64(handle::cublasHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, B::CuPtr{cuDoubleComplex}, ldb::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Int64)::cublasStatus_t end @@ -2940,7 +2940,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Int64, n::Int64, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Int64, B::CuPtr{Cfloat}, ldb::Int64)::cublasStatus_t end @@ -2953,7 +2953,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Int64, n::Int64, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Int64, B::CuPtr{Cdouble}, ldb::Int64)::cublasStatus_t end @@ -2966,7 +2966,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, B::CuPtr{cuComplex}, ldb::Int64)::cublasStatus_t @@ -2980,7 +2980,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, B::CuPtr{cuDoubleComplex}, ldb::Int64)::cublasStatus_t @@ -2994,7 +2994,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Int64, n::Int64, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Int64, B::CuPtr{Cfloat}, ldb::Int64, C::CuPtr{Cfloat}, ldc::Int64)::cublasStatus_t end @@ -3007,7 +3007,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Int64, n::Int64, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Int64, B::CuPtr{Cdouble}, ldb::Int64, C::CuPtr{Cdouble}, ldc::Int64)::cublasStatus_t end @@ -3020,7 +3020,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, B::CuPtr{cuComplex}, ldb::Int64, C::CuPtr{cuComplex}, @@ -3035,7 +3035,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, B::CuPtr{cuDoubleComplex}, ldb::Int64, C::CuPtr{cuDoubleComplex}, @@ -3385,7 +3385,7 @@ end @checked function cublasScalEx(handle, n, alpha, alphaType, x, xType, incx, executionType) initialize_context() @gcsafe_ccall libcublas.cublasScalEx(handle::cublasHandle_t, n::Cint, - alpha::PtrOrCuPtr{Cvoid}, alphaType::cudaDataType, + alpha::CuPtr{Cvoid}, alphaType::cudaDataType, x::CuPtr{Cvoid}, xType::cudaDataType, incx::Cint, executionType::cudaDataType)::cublasStatus_t end @@ -3394,7 +3394,7 @@ end executionType) initialize_context() @gcsafe_ccall libcublas.cublasScalEx_64(handle::cublasHandle_t, n::Int64, - alpha::PtrOrCuPtr{Cvoid}, + alpha::CuPtr{Cvoid}, alphaType::cudaDataType, x::CuPtr{Cvoid}, xType::cudaDataType, incx::Int64, executionType::cudaDataType)::cublasStatus_t @@ -3404,7 +3404,7 @@ end executiontype) initialize_context() @gcsafe_ccall libcublas.cublasAxpyEx(handle::cublasHandle_t, n::Cint, - alpha::PtrOrCuPtr{Cvoid}, alphaType::cudaDataType, + alpha::CuPtr{Cvoid}, alphaType::cudaDataType, x::CuPtr{Cvoid}, xType::cudaDataType, incx::Cint, y::CuPtr{Cvoid}, yType::cudaDataType, incy::Cint, executiontype::cudaDataType)::cublasStatus_t @@ -3414,7 +3414,7 @@ end incy, executiontype) initialize_context() @gcsafe_ccall libcublas.cublasAxpyEx_64(handle::cublasHandle_t, n::Int64, - alpha::PtrOrCuPtr{Cvoid}, + alpha::CuRef{Cvoid}, alphaType::cudaDataType, x::CuPtr{Cvoid}, xType::cudaDataType, incx::Int64, y::CuPtr{Cvoid}, yType::cudaDataType, @@ -3458,7 +3458,7 @@ end initialize_context() @gcsafe_ccall libcublas.cublasIamaxEx(handle::cublasHandle_t, n::Cint, x::CuPtr{Cvoid}, xType::cudaDataType, incx::Cint, - result::RefOrCuRef{Cint})::cublasStatus_t + result::CuRef{Cint})::cublasStatus_t end @checked function cublasIamaxEx_64(handle, n, x, xType, incx, result) @@ -3466,14 +3466,14 @@ end @gcsafe_ccall libcublas.cublasIamaxEx_64(handle::cublasHandle_t, n::Int64, x::CuPtr{Cvoid}, xType::cudaDataType, incx::Int64, - result::RefOrCuRef{Int64})::cublasStatus_t + result::CuRef{Int64})::cublasStatus_t end @checked function cublasIaminEx(handle, n, x, xType, incx, result) initialize_context() @gcsafe_ccall libcublas.cublasIaminEx(handle::cublasHandle_t, n::Cint, x::CuPtr{Cvoid}, xType::cudaDataType, incx::Cint, - result::RefOrCuRef{Cint})::cublasStatus_t + result::CuRef{Cint})::cublasStatus_t end @checked function cublasIaminEx_64(handle, n, x, xType, incx, result) @@ -3481,7 +3481,7 @@ end @gcsafe_ccall libcublas.cublasIaminEx_64(handle::cublasHandle_t, n::Int64, x::CuPtr{Cvoid}, xType::cudaDataType, incx::Int64, - result::RefOrCuRef{Int64})::cublasStatus_t + result::CuRef{Int64})::cublasStatus_t end @checked function cublasAsumEx(handle, n, x, xType, incx, result, resultType, executiontype) @@ -3574,10 +3574,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSgemvBatched(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, Aarray::CuPtr{Ptr{Cfloat}}, lda::Cint, xarray::CuPtr{Ptr{Cfloat}}, incx::Cint, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Cfloat}}, incy::Cint, batchCount::Cint)::cublasStatus_t end @@ -3587,10 +3587,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, - n::Int64, alpha::RefOrCuRef{Cfloat}, + n::Int64, alpha::CuRef{Cfloat}, Aarray::CuPtr{Ptr{Cfloat}}, lda::Int64, xarray::CuPtr{Ptr{Cfloat}}, incx::Int64, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Cfloat}}, incy::Int64, batchCount::Int64)::cublasStatus_t end @@ -3600,10 +3600,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDgemvBatched(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, Aarray::CuPtr{Ptr{Cdouble}}, lda::Cint, xarray::CuPtr{Ptr{Cdouble}}, incx::Cint, - beta::RefOrCuRef{Cdouble}, + beta::CuRef{Cdouble}, yarray::CuPtr{Ptr{Cdouble}}, incy::Cint, batchCount::Cint)::cublasStatus_t end @@ -3613,10 +3613,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, - n::Int64, alpha::RefOrCuRef{Cdouble}, + n::Int64, alpha::CuRef{Cdouble}, Aarray::CuPtr{Ptr{Cdouble}}, lda::Int64, xarray::CuPtr{Ptr{Cdouble}}, incx::Int64, - beta::RefOrCuRef{Cdouble}, + beta::CuRef{Cdouble}, yarray::CuPtr{Ptr{Cdouble}}, incy::Int64, batchCount::Int64)::cublasStatus_t end @@ -3626,10 +3626,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCgemvBatched(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, Aarray::CuPtr{Ptr{cuComplex}}, lda::Cint, xarray::CuPtr{Ptr{cuComplex}}, incx::Cint, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, yarray::CuPtr{Ptr{cuComplex}}, incy::Cint, batchCount::Cint)::cublasStatus_t end @@ -3639,10 +3639,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, - n::Int64, alpha::RefOrCuRef{cuComplex}, + n::Int64, alpha::CuRef{cuComplex}, Aarray::CuPtr{Ptr{cuComplex}}, lda::Int64, xarray::CuPtr{Ptr{cuComplex}}, - incx::Int64, beta::RefOrCuRef{cuComplex}, + incx::Int64, beta::CuRef{cuComplex}, yarray::CuPtr{Ptr{cuComplex}}, incy::Int64, batchCount::Int64)::cublasStatus_t @@ -3653,12 +3653,12 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZgemvBatched(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, Aarray::CuPtr{Ptr{cuDoubleComplex}}, lda::Cint, xarray::CuPtr{Ptr{cuDoubleComplex}}, incx::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, yarray::CuPtr{Ptr{cuDoubleComplex}}, incy::Cint, batchCount::Cint)::cublasStatus_t end @@ -3669,12 +3669,12 @@ end @gcsafe_ccall libcublas.cublasZgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, Aarray::CuPtr{Ptr{cuDoubleComplex}}, lda::Int64, xarray::CuPtr{Ptr{cuDoubleComplex}}, incx::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, yarray::CuPtr{Ptr{cuDoubleComplex}}, incy::Int64, batchCount::Int64)::cublasStatus_t @@ -3686,11 +3686,11 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSgemvStridedBatched(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, - n::Cint, alpha::RefOrCuRef{Cfloat}, + n::Cint, alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Cint, strideA::Clonglong, x::CuPtr{Cfloat}, incx::Cint, stridex::Clonglong, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, y::CuPtr{Cfloat}, incy::Cint, stridey::Clonglong, batchCount::Cint)::cublasStatus_t @@ -3703,12 +3703,12 @@ end @gcsafe_ccall libcublas.cublasSgemvStridedBatched_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, n::Int64, - alpha::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Int64, strideA::Clonglong, x::CuPtr{Cfloat}, incx::Int64, stridex::Clonglong, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, y::CuPtr{Cfloat}, incy::Int64, stridey::Clonglong, batchCount::Int64)::cublasStatus_t @@ -3720,11 +3720,11 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDgemvStridedBatched(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, - n::Cint, alpha::RefOrCuRef{Cdouble}, + n::Cint, alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Cint, strideA::Clonglong, x::CuPtr{Cdouble}, incx::Cint, stridex::Clonglong, - beta::RefOrCuRef{Cdouble}, + beta::CuRef{Cdouble}, y::CuPtr{Cdouble}, incy::Cint, stridey::Clonglong, batchCount::Cint)::cublasStatus_t @@ -3737,12 +3737,12 @@ end @gcsafe_ccall libcublas.cublasDgemvStridedBatched_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, n::Int64, - alpha::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Int64, strideA::Clonglong, x::CuPtr{Cdouble}, incx::Int64, stridex::Clonglong, - beta::RefOrCuRef{Cdouble}, + beta::CuRef{Cdouble}, y::CuPtr{Cdouble}, incy::Int64, stridey::Clonglong, batchCount::Int64)::cublasStatus_t @@ -3754,12 +3754,12 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCgemvStridedBatched(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, - n::Cint, alpha::RefOrCuRef{cuComplex}, + n::Cint, alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, strideA::Clonglong, x::CuPtr{cuComplex}, incx::Cint, stridex::Clonglong, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, y::CuPtr{cuComplex}, incy::Cint, stridey::Clonglong, batchCount::Cint)::cublasStatus_t @@ -3772,12 +3772,12 @@ end @gcsafe_ccall libcublas.cublasCgemvStridedBatched_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, strideA::Clonglong, x::CuPtr{cuComplex}, incx::Int64, stridex::Clonglong, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, y::CuPtr{cuComplex}, incy::Int64, stridey::Clonglong, batchCount::Int64)::cublasStatus_t @@ -3790,12 +3790,12 @@ end @gcsafe_ccall libcublas.cublasZgemvStridedBatched(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, strideA::Clonglong, x::CuPtr{cuDoubleComplex}, incx::Cint, stridex::Clonglong, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, y::CuPtr{cuDoubleComplex}, incy::Cint, stridey::Clonglong, batchCount::Cint)::cublasStatus_t @@ -3808,12 +3808,12 @@ end @gcsafe_ccall libcublas.cublasZgemvStridedBatched_64(handle::cublasHandle_t, trans::cublasOperation_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, strideA::Clonglong, x::CuPtr{cuDoubleComplex}, incx::Int64, stridex::Clonglong, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, y::CuPtr{cuDoubleComplex}, incy::Int64, stridey::Clonglong, batchCount::Int64)::cublasStatus_t @@ -3824,10 +3824,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCgemm3m(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{cuComplex}, + k::Cint, alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, B::CuPtr{cuComplex}, ldb::Cint, - beta::RefOrCuRef{cuComplex}, C::CuPtr{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Cint)::cublasStatus_t end @@ -3837,10 +3837,10 @@ end @gcsafe_ccall libcublas.cublasCgemm3m_64(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, - k::Int64, alpha::RefOrCuRef{cuComplex}, + k::Int64, alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, B::CuPtr{cuComplex}, ldb::Int64, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Int64)::cublasStatus_t end @@ -3851,10 +3851,10 @@ end @gcsafe_ccall libcublas.cublasCgemm3mEx(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{cuComplex}, + k::Cint, alpha::CuRef{cuComplex}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Cint, B::CuPtr{Cvoid}, Btype::cudaDataType, ldb::Cint, - beta::RefOrCuRef{cuComplex}, C::CuPtr{Cvoid}, + beta::CuRef{cuComplex}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Cint)::cublasStatus_t end @@ -3865,11 +3865,11 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, k::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Int64, B::CuPtr{Cvoid}, Btype::cudaDataType, ldb::Int64, - beta::RefOrCuRef{cuComplex}, C::CuPtr{Cvoid}, + beta::CuRef{cuComplex}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Int64)::cublasStatus_t end @@ -3879,10 +3879,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZgemm3m(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{cuDoubleComplex}, + k::Cint, alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, B::CuPtr{cuDoubleComplex}, ldb::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Cint)::cublasStatus_t end @@ -3893,10 +3893,10 @@ end @gcsafe_ccall libcublas.cublasZgemm3m_64(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, - k::Int64, alpha::RefOrCuRef{cuDoubleComplex}, + k::Int64, alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, B::CuPtr{cuDoubleComplex}, ldb::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Int64)::cublasStatus_t end @@ -3906,10 +3906,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSgemmEx(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{Cfloat}, + k::Cint, alpha::CuRef{Cfloat}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Cint, B::CuPtr{Cvoid}, Btype::cudaDataType, ldb::Cint, - beta::RefOrCuRef{Cfloat}, C::CuPtr{Cvoid}, + beta::CuRef{Cfloat}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Cint)::cublasStatus_t end @@ -3919,11 +3919,11 @@ end @gcsafe_ccall libcublas.cublasSgemmEx_64(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, - k::Int64, alpha::RefOrCuRef{Cfloat}, + k::Int64, alpha::CuRef{Cfloat}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Int64, B::CuPtr{Cvoid}, Btype::cudaDataType, ldb::Int64, - beta::RefOrCuRef{Cfloat}, C::CuPtr{Cvoid}, + beta::CuRef{Cfloat}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Int64)::cublasStatus_t end @@ -3963,10 +3963,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCgemmEx(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{cuComplex}, + k::Cint, alpha::CuRef{cuComplex}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Cint, B::CuPtr{Cvoid}, Btype::cudaDataType, ldb::Cint, - beta::RefOrCuRef{cuComplex}, C::CuPtr{Cvoid}, + beta::CuRef{cuComplex}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Cint)::cublasStatus_t end @@ -3976,11 +3976,11 @@ end @gcsafe_ccall libcublas.cublasCgemmEx_64(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, - k::Int64, alpha::RefOrCuRef{cuComplex}, + k::Int64, alpha::CuRef{cuComplex}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Int64, B::CuPtr{Cvoid}, Btype::cudaDataType, ldb::Int64, - beta::RefOrCuRef{cuComplex}, C::CuPtr{Cvoid}, + beta::CuRef{cuComplex}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Int64)::cublasStatus_t end @@ -3990,9 +3990,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCsyrkEx(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{cuComplex}, A::CuPtr{Cvoid}, + alpha::CuRef{cuComplex}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Cint, - beta::RefOrCuRef{cuComplex}, C::CuPtr{Cvoid}, + beta::CuRef{cuComplex}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Cint)::cublasStatus_t end @@ -4001,9 +4001,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCsyrkEx_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{cuComplex}, A::CuPtr{Cvoid}, + alpha::CuRef{cuComplex}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Int64, - beta::RefOrCuRef{cuComplex}, C::CuPtr{Cvoid}, + beta::CuRef{cuComplex}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Int64)::cublasStatus_t end @@ -4013,9 +4013,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCsyrk3mEx(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{cuComplex}, A::CuPtr{Cvoid}, + alpha::CuRef{cuComplex}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Cint, - beta::RefOrCuRef{cuComplex}, C::CuPtr{Cvoid}, + beta::CuRef{cuComplex}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Cint)::cublasStatus_t end @@ -4025,9 +4025,9 @@ end @gcsafe_ccall libcublas.cublasCsyrk3mEx_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{Cvoid}, Atype::cudaDataType, - lda::Int64, beta::RefOrCuRef{cuComplex}, + lda::Int64, beta::CuRef{cuComplex}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Int64)::cublasStatus_t end @@ -4037,9 +4037,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCherkEx(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cvoid}, + alpha::CuRef{Cfloat}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Cint, - beta::RefOrCuRef{Cfloat}, C::CuPtr{Cvoid}, + beta::CuRef{Cfloat}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Cint)::cublasStatus_t end @@ -4048,9 +4048,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCherkEx_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cvoid}, + alpha::CuRef{Cfloat}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Int64, - beta::RefOrCuRef{Cfloat}, C::CuPtr{Cvoid}, + beta::CuRef{Cfloat}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Int64)::cublasStatus_t end @@ -4060,9 +4060,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCherk3mEx(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cvoid}, + alpha::CuRef{Cfloat}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Cint, - beta::RefOrCuRef{Cfloat}, C::CuPtr{Cvoid}, + beta::CuRef{Cfloat}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Cint)::cublasStatus_t end @@ -4072,9 +4072,9 @@ end @gcsafe_ccall libcublas.cublasCherk3mEx_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cvoid}, + alpha::CuRef{Cfloat}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Int64, - beta::RefOrCuRef{Cfloat}, C::CuPtr{Cvoid}, + beta::CuRef{Cfloat}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Int64)::cublasStatus_t end @@ -4084,9 +4084,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSsyrkx(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Cint, B::CuPtr{Cfloat}, ldb::Cint, - beta::RefOrCuRef{Cfloat}, C::CuPtr{Cfloat}, + beta::CuRef{Cfloat}, C::CuPtr{Cfloat}, ldc::Cint)::cublasStatus_t end @@ -4095,9 +4095,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSsyrkx_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Int64, B::CuPtr{Cfloat}, ldb::Int64, - beta::RefOrCuRef{Cfloat}, C::CuPtr{Cfloat}, + beta::CuRef{Cfloat}, C::CuPtr{Cfloat}, ldc::Int64)::cublasStatus_t end @@ -4106,9 +4106,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDsyrkx(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Cint, B::CuPtr{Cdouble}, ldb::Cint, - beta::RefOrCuRef{Cdouble}, C::CuPtr{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{Cdouble}, ldc::Cint)::cublasStatus_t end @@ -4117,9 +4117,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDsyrkx_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Int64, B::CuPtr{Cdouble}, ldb::Int64, - beta::RefOrCuRef{Cdouble}, C::CuPtr{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{Cdouble}, ldc::Int64)::cublasStatus_t end @@ -4128,9 +4128,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCsyrkx(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{cuComplex}, A::CuPtr{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, B::CuPtr{cuComplex}, ldb::Cint, - beta::RefOrCuRef{cuComplex}, C::CuPtr{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Cint)::cublasStatus_t end @@ -4139,10 +4139,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCsyrkx_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, B::CuPtr{cuComplex}, ldb::Int64, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Int64)::cublasStatus_t end @@ -4151,10 +4151,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZsyrkx(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, B::CuPtr{cuDoubleComplex}, ldb::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Cint)::cublasStatus_t end @@ -4164,10 +4164,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZsyrkx_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, B::CuPtr{cuDoubleComplex}, ldb::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Int64)::cublasStatus_t end @@ -4177,9 +4177,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCherkx(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{cuComplex}, A::CuPtr{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, B::CuPtr{cuComplex}, ldb::Cint, - beta::RefOrCuRef{Cfloat}, C::CuPtr{cuComplex}, + beta::CuRef{Cfloat}, C::CuPtr{cuComplex}, ldc::Cint)::cublasStatus_t end @@ -4188,10 +4188,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCherkx_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, B::CuPtr{cuComplex}, ldb::Int64, - beta::RefOrCuRef{Cfloat}, C::CuPtr{cuComplex}, + beta::CuRef{Cfloat}, C::CuPtr{cuComplex}, ldc::Int64)::cublasStatus_t end @@ -4200,10 +4200,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZherkx(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Cint, k::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, B::CuPtr{cuDoubleComplex}, ldb::Cint, - beta::RefOrCuRef{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{cuDoubleComplex}, ldc::Cint)::cublasStatus_t end @@ -4213,10 +4213,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZherkx_64(handle::cublasHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Int64, k::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, B::CuPtr{cuDoubleComplex}, ldb::Int64, - beta::RefOrCuRef{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{cuDoubleComplex}, ldc::Int64)::cublasStatus_t end @@ -4227,10 +4227,10 @@ end @gcsafe_ccall libcublas.cublasSgemmBatched(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{Cfloat}, + k::Cint, alpha::CuRef{Cfloat}, Aarray::CuPtr{Ptr{Cfloat}}, lda::Cint, Barray::CuPtr{Ptr{Cfloat}}, ldb::Cint, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, Carray::CuPtr{Ptr{Cfloat}}, ldc::Cint, batchCount::Cint)::cublasStatus_t end @@ -4242,10 +4242,10 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, k::Int64, - alpha::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, Aarray::CuPtr{Ptr{Cfloat}}, lda::Int64, Barray::CuPtr{Ptr{Cfloat}}, ldb::Int64, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, Carray::CuPtr{Ptr{Cfloat}}, ldc::Int64, batchCount::Int64)::cublasStatus_t end @@ -4256,10 +4256,10 @@ end @gcsafe_ccall libcublas.cublasDgemmBatched(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{Cdouble}, + k::Cint, alpha::CuRef{Cdouble}, Aarray::CuPtr{Ptr{Cdouble}}, lda::Cint, Barray::CuPtr{Ptr{Cdouble}}, ldb::Cint, - beta::RefOrCuRef{Cdouble}, + beta::CuRef{Cdouble}, Carray::CuPtr{Ptr{Cdouble}}, ldc::Cint, batchCount::Cint)::cublasStatus_t end @@ -4271,10 +4271,10 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, k::Int64, - alpha::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, Aarray::CuPtr{Ptr{Cdouble}}, lda::Int64, Barray::CuPtr{Ptr{Cdouble}}, ldb::Int64, - beta::RefOrCuRef{Cdouble}, + beta::CuRef{Cdouble}, Carray::CuPtr{Ptr{Cdouble}}, ldc::Int64, batchCount::Int64)::cublasStatus_t end @@ -4285,10 +4285,10 @@ end @gcsafe_ccall libcublas.cublasCgemmBatched(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{cuComplex}, + k::Cint, alpha::CuRef{cuComplex}, Aarray::CuPtr{Ptr{cuComplex}}, lda::Cint, Barray::CuPtr{Ptr{cuComplex}}, ldb::Cint, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, Carray::CuPtr{Ptr{cuComplex}}, ldc::Cint, batchCount::Cint)::cublasStatus_t end @@ -4300,10 +4300,10 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, k::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, Aarray::CuPtr{Ptr{cuComplex}}, lda::Int64, Barray::CuPtr{Ptr{cuComplex}}, ldb::Int64, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, Carray::CuPtr{Ptr{cuComplex}}, ldc::Int64, batchCount::Int64)::cublasStatus_t end @@ -4315,10 +4315,10 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, k::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, Aarray::CuPtr{Ptr{cuComplex}}, lda::Cint, Barray::CuPtr{Ptr{cuComplex}}, ldb::Cint, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, Carray::CuPtr{Ptr{cuComplex}}, ldc::Cint, batchCount::Cint)::cublasStatus_t end @@ -4330,11 +4330,11 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, k::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, Aarray::CuPtr{Ptr{cuComplex}}, lda::Int64, Barray::CuPtr{Ptr{cuComplex}}, - ldb::Int64, beta::RefOrCuRef{cuComplex}, + ldb::Int64, beta::CuRef{cuComplex}, Carray::CuPtr{Ptr{cuComplex}}, ldc::Int64, batchCount::Int64)::cublasStatus_t @@ -4346,11 +4346,11 @@ end @gcsafe_ccall libcublas.cublasZgemmBatched(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{cuDoubleComplex}, + k::Cint, alpha::CuRef{cuDoubleComplex}, Aarray::CuPtr{Ptr{cuDoubleComplex}}, lda::Cint, Barray::CuPtr{Ptr{cuDoubleComplex}}, - ldb::Cint, beta::RefOrCuRef{cuDoubleComplex}, + ldb::Cint, beta::CuRef{cuDoubleComplex}, Carray::CuPtr{Ptr{cuDoubleComplex}}, ldc::Cint, batchCount::Cint)::cublasStatus_t end @@ -4362,12 +4362,12 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, k::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, Aarray::CuPtr{Ptr{cuDoubleComplex}}, lda::Int64, Barray::CuPtr{Ptr{cuDoubleComplex}}, ldb::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, Carray::CuPtr{Ptr{cuDoubleComplex}}, ldc::Int64, batchCount::Int64)::cublasStatus_t @@ -4381,11 +4381,11 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, k::Cint, - alpha::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Cint, strideA::Clonglong, B::CuPtr{Cfloat}, ldb::Cint, strideB::Clonglong, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, C::CuPtr{Cfloat}, ldc::Cint, strideC::Clonglong, batchCount::Cint)::cublasStatus_t @@ -4399,12 +4399,12 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, k::Int64, - alpha::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, lda::Int64, strideA::Clonglong, B::CuPtr{Cfloat}, ldb::Int64, strideB::Clonglong, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, C::CuPtr{Cfloat}, ldc::Int64, strideC::Clonglong, batchCount::Int64)::cublasStatus_t @@ -4418,11 +4418,11 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, k::Cint, - alpha::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Cint, strideA::Clonglong, B::CuPtr{Cdouble}, ldb::Cint, strideB::Clonglong, - beta::RefOrCuRef{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{Cdouble}, ldc::Cint, strideC::Clonglong, batchCount::Cint)::cublasStatus_t @@ -4436,12 +4436,12 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, k::Int64, - alpha::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, lda::Int64, strideA::Clonglong, B::CuPtr{Cdouble}, ldb::Int64, strideB::Clonglong, - beta::RefOrCuRef{Cdouble}, + beta::CuRef{Cdouble}, C::CuPtr{Cdouble}, ldc::Int64, strideC::Clonglong, batchCount::Int64)::cublasStatus_t @@ -4455,12 +4455,12 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, k::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, strideA::Clonglong, B::CuPtr{cuComplex}, ldb::Cint, strideB::Clonglong, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Cint, strideC::Clonglong, batchCount::Cint)::cublasStatus_t @@ -4474,12 +4474,12 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, k::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, strideA::Clonglong, B::CuPtr{cuComplex}, ldb::Int64, strideB::Clonglong, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Int64, strideC::Clonglong, batchCount::Int64)::cublasStatus_t @@ -4493,12 +4493,12 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, k::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Cint, strideA::Clonglong, B::CuPtr{cuComplex}, ldb::Cint, strideB::Clonglong, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Cint, strideC::Clonglong, batchCount::Cint)::cublasStatus_t @@ -4512,12 +4512,12 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, k::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, strideA::Clonglong, B::CuPtr{cuComplex}, ldb::Int64, strideB::Clonglong, - beta::RefOrCuRef{cuComplex}, + beta::CuRef{cuComplex}, C::CuPtr{cuComplex}, ldc::Int64, strideC::Clonglong, batchCount::Int64)::cublasStatus_t @@ -4531,12 +4531,12 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, k::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, strideA::Clonglong, B::CuPtr{cuDoubleComplex}, ldb::Cint, strideB::Clonglong, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Cint, strideC::Clonglong, batchCount::Cint)::cublasStatus_t @@ -4550,12 +4550,12 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, k::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, strideA::Clonglong, B::CuPtr{cuDoubleComplex}, ldb::Int64, strideB::Clonglong, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, C::CuPtr{cuDoubleComplex}, ldc::Int64, strideC::Clonglong, batchCount::Int64)::cublasStatus_t @@ -4568,12 +4568,12 @@ end @gcsafe_ccall libcublas.cublasGemmBatchedEx(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::PtrOrCuPtr{Cvoid}, + k::Cint, alpha::CuPtr{Cvoid}, Aarray::CuPtr{Ptr{Cvoid}}, Atype::cudaDataType, lda::Cint, Barray::CuPtr{Ptr{Cvoid}}, Btype::cudaDataType, ldb::Cint, - beta::PtrOrCuPtr{Cvoid}, + beta::CuPtr{Cvoid}, Carray::CuPtr{Ptr{Cvoid}}, Ctype::cudaDataType, ldc::Cint, batchCount::Cint, @@ -4589,12 +4589,12 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, k::Int64, - alpha::PtrOrCuPtr{Cvoid}, + alpha::CuRef{Cvoid}, Aarray::CuPtr{Ptr{Cvoid}}, Atype::cudaDataType, lda::Int64, Barray::CuPtr{Ptr{Cvoid}}, Btype::cudaDataType, ldb::Int64, - beta::PtrOrCuPtr{Cvoid}, + beta::CuRef{Cvoid}, Carray::CuPtr{Ptr{Cvoid}}, Ctype::cudaDataType, ldc::Int64, batchCount::Int64, @@ -4611,12 +4611,12 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, k::Cint, - alpha::PtrOrCuPtr{Cvoid}, + alpha::CuPtr{Cvoid}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Cint, strideA::Clonglong, B::CuPtr{Cvoid}, Btype::cudaDataType, ldb::Cint, strideB::Clonglong, - beta::PtrOrCuPtr{Cvoid}, + beta::CuPtr{Cvoid}, C::CuPtr{Cvoid}, Ctype::cudaDataType, ldc::Cint, strideC::Clonglong, batchCount::Cint, @@ -4633,7 +4633,7 @@ end transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, k::Int64, - alpha::PtrOrCuPtr{Cvoid}, + alpha::CuRef{Cvoid}, A::CuPtr{Cvoid}, Atype::cudaDataType, lda::Int64, strideA::Clonglong, @@ -4641,7 +4641,7 @@ end Btype::cudaDataType, ldb::Int64, strideB::Clonglong, beta::PtrOrCuPtr{Cvoid}, - C::CuPtr{Cvoid}, + C::CuRef{Cvoid}, Ctype::cudaDataType, ldc::Int64, strideC::Clonglong, batchCount::Int64, @@ -4802,8 +4802,8 @@ end initialize_context() @gcsafe_ccall libcublas.cublasSgeam(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, - lda::Cint, beta::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, + lda::Cint, beta::CuRef{Cfloat}, B::CuPtr{Cfloat}, ldb::Cint, C::CuPtr{Cfloat}, ldc::Cint)::cublasStatus_t end @@ -4814,8 +4814,8 @@ end @gcsafe_ccall libcublas.cublasSgeam_64(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, - alpha::RefOrCuRef{Cfloat}, A::CuPtr{Cfloat}, - lda::Int64, beta::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Cfloat}, + lda::Int64, beta::CuRef{Cfloat}, B::CuPtr{Cfloat}, ldb::Int64, C::CuPtr{Cfloat}, ldc::Int64)::cublasStatus_t end @@ -4825,8 +4825,8 @@ end initialize_context() @gcsafe_ccall libcublas.cublasDgeam(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, - lda::Cint, beta::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, + lda::Cint, beta::CuRef{Cdouble}, B::CuPtr{Cdouble}, ldb::Cint, C::CuPtr{Cdouble}, ldc::Cint)::cublasStatus_t end @@ -4837,8 +4837,8 @@ end @gcsafe_ccall libcublas.cublasDgeam_64(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, - alpha::RefOrCuRef{Cdouble}, A::CuPtr{Cdouble}, - lda::Int64, beta::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Cdouble}, + lda::Int64, beta::CuRef{Cdouble}, B::CuPtr{Cdouble}, ldb::Int64, C::CuPtr{Cdouble}, ldc::Int64)::cublasStatus_t end @@ -4848,8 +4848,8 @@ end initialize_context() @gcsafe_ccall libcublas.cublasCgeam(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuComplex}, A::CuPtr{cuComplex}, - lda::Cint, beta::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, + lda::Cint, beta::CuRef{cuComplex}, B::CuPtr{cuComplex}, ldb::Cint, C::CuPtr{cuComplex}, ldc::Cint)::cublasStatus_t end @@ -4860,9 +4860,9 @@ end @gcsafe_ccall libcublas.cublasCgeam_64(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{cuComplex}, lda::Int64, - beta::RefOrCuRef{cuComplex}, B::CuPtr{cuComplex}, + beta::CuRef{cuComplex}, B::CuPtr{cuComplex}, ldb::Int64, C::CuPtr{cuComplex}, ldc::Int64)::cublasStatus_t end @@ -4872,9 +4872,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasZgeam(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Cint, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, B::CuPtr{cuDoubleComplex}, ldb::Cint, C::CuPtr{cuDoubleComplex}, ldc::Cint)::cublasStatus_t @@ -4886,9 +4886,9 @@ end @gcsafe_ccall libcublas.cublasZgeam_64(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{cuDoubleComplex}, lda::Int64, - beta::RefOrCuRef{cuDoubleComplex}, + beta::CuRef{cuDoubleComplex}, B::CuPtr{cuDoubleComplex}, ldb::Int64, C::CuPtr{cuDoubleComplex}, ldc::Int64)::cublasStatus_t @@ -4902,7 +4902,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Ptr{Cfloat}}, lda::Cint, B::CuPtr{Ptr{Cfloat}}, ldb::Cint, batchCount::Cint)::cublasStatus_t @@ -4916,7 +4916,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Int64, - n::Int64, alpha::RefOrCuRef{Cfloat}, + n::Int64, alpha::CuRef{Cfloat}, A::CuPtr{Ptr{Cfloat}}, lda::Int64, B::CuPtr{Ptr{Cfloat}}, ldb::Int64, batchCount::Int64)::cublasStatus_t @@ -4930,7 +4930,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cdouble}, + alpha::CuRef{Cdouble}, A::CuPtr{Ptr{Cdouble}}, lda::Cint, B::CuPtr{Ptr{Cdouble}}, ldb::Cint, batchCount::Cint)::cublasStatus_t @@ -4944,7 +4944,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Int64, - n::Int64, alpha::RefOrCuRef{Cdouble}, + n::Int64, alpha::CuRef{Cdouble}, A::CuPtr{Ptr{Cdouble}}, lda::Int64, B::CuPtr{Ptr{Cdouble}}, ldb::Int64, batchCount::Int64)::cublasStatus_t @@ -4958,7 +4958,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuComplex}, + alpha::CuRef{cuComplex}, A::CuPtr{Ptr{cuComplex}}, lda::Cint, B::CuPtr{Ptr{cuComplex}}, ldb::Cint, batchCount::Cint)::cublasStatus_t @@ -4972,7 +4972,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Int64, - n::Int64, alpha::RefOrCuRef{cuComplex}, + n::Int64, alpha::CuRef{cuComplex}, A::CuPtr{Ptr{cuComplex}}, lda::Int64, B::CuPtr{Ptr{cuComplex}}, ldb::Int64, batchCount::Int64)::cublasStatus_t @@ -4986,7 +4986,7 @@ end uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Cint, n::Cint, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{Ptr{cuDoubleComplex}}, lda::Cint, B::CuPtr{Ptr{cuDoubleComplex}}, ldb::Cint, batchCount::Cint)::cublasStatus_t @@ -5001,7 +5001,7 @@ end trans::cublasOperation_t, diag::cublasDiagType_t, m::Int64, n::Int64, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::CuRef{cuDoubleComplex}, A::CuPtr{Ptr{cuDoubleComplex}}, lda::Int64, B::CuPtr{Ptr{cuDoubleComplex}}, @@ -5492,10 +5492,10 @@ end @gcsafe_ccall libcublas.cublasXtSgemm(handle::cublasXtHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Csize_t, n::Csize_t, - k::Csize_t, alpha::RefOrCuRef{Cfloat}, + k::Csize_t, alpha::Ref{Cfloat}, A::PtrOrCuPtr{Cfloat}, lda::Csize_t, B::PtrOrCuPtr{Cfloat}, ldb::Csize_t, - beta::RefOrCuRef{Cfloat}, C::PtrOrCuPtr{Cfloat}, + beta::Ref{Cfloat}, C::PtrOrCuPtr{Cfloat}, ldc::Csize_t)::cublasStatus_t end @@ -5505,10 +5505,10 @@ end @gcsafe_ccall libcublas.cublasXtDgemm(handle::cublasXtHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Csize_t, n::Csize_t, - k::Csize_t, alpha::RefOrCuRef{Cdouble}, + k::Csize_t, alpha::Ref{Cdouble}, A::PtrOrCuPtr{Cdouble}, lda::Csize_t, B::PtrOrCuPtr{Cdouble}, ldb::Csize_t, - beta::RefOrCuRef{Cdouble}, C::PtrOrCuPtr{Cdouble}, + beta::Ref{Cdouble}, C::PtrOrCuPtr{Cdouble}, ldc::Csize_t)::cublasStatus_t end @@ -5518,10 +5518,10 @@ end @gcsafe_ccall libcublas.cublasXtCgemm(handle::cublasXtHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Csize_t, n::Csize_t, - k::Csize_t, alpha::RefOrCuRef{cuComplex}, + k::Csize_t, alpha::Ref{cuComplex}, A::PtrOrCuPtr{cuComplex}, lda::Csize_t, B::PtrOrCuPtr{cuComplex}, ldb::Csize_t, - beta::RefOrCuRef{cuComplex}, + beta::Ref{cuComplex}, C::PtrOrCuPtr{cuComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5532,10 +5532,10 @@ end @gcsafe_ccall libcublas.cublasXtZgemm(handle::cublasXtHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Csize_t, n::Csize_t, - k::Csize_t, alpha::RefOrCuRef{cuDoubleComplex}, + k::Csize_t, alpha::Ref{cuDoubleComplex}, A::PtrOrCuPtr{cuDoubleComplex}, lda::Csize_t, B::PtrOrCuPtr{cuDoubleComplex}, ldb::Csize_t, - beta::RefOrCuRef{cuDoubleComplex}, + beta::Ref{cuDoubleComplex}, C::PtrOrCuPtr{cuDoubleComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5544,8 +5544,8 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtSsyrk(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{Cfloat}, A::PtrOrCuPtr{Cfloat}, - lda::Csize_t, beta::RefOrCuRef{Cfloat}, + alpha::Ref{Cfloat}, A::PtrOrCuPtr{Cfloat}, + lda::Csize_t, beta::Ref{Cfloat}, C::PtrOrCuPtr{Cfloat}, ldc::Csize_t)::cublasStatus_t end @@ -5554,9 +5554,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtDsyrk(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{Cdouble}, + alpha::Ref{Cdouble}, A::PtrOrCuPtr{Cdouble}, lda::Csize_t, - beta::RefOrCuRef{Cdouble}, C::PtrOrCuPtr{Cdouble}, + beta::Ref{Cdouble}, C::PtrOrCuPtr{Cdouble}, ldc::Csize_t)::cublasStatus_t end @@ -5564,9 +5564,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtCsyrk(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{cuComplex}, + alpha::Ref{cuComplex}, A::PtrOrCuPtr{cuComplex}, lda::Csize_t, - beta::RefOrCuRef{cuComplex}, + beta::Ref{cuComplex}, C::PtrOrCuPtr{cuComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5575,9 +5575,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtZsyrk(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::Ref{cuDoubleComplex}, A::PtrOrCuPtr{cuDoubleComplex}, lda::Csize_t, - beta::RefOrCuRef{cuDoubleComplex}, + beta::Ref{cuDoubleComplex}, C::PtrOrCuPtr{cuDoubleComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5586,9 +5586,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtCherk(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{Cfloat}, + alpha::Ref{Cfloat}, A::PtrOrCuPtr{cuComplex}, lda::Csize_t, - beta::RefOrCuRef{Cfloat}, + beta::Ref{Cfloat}, C::PtrOrCuPtr{cuComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5597,9 +5597,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtZherk(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{Cdouble}, + alpha::Ref{Cdouble}, A::PtrOrCuPtr{cuDoubleComplex}, lda::Csize_t, - beta::RefOrCuRef{Cdouble}, + beta::Ref{Cdouble}, C::PtrOrCuPtr{cuDoubleComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5609,9 +5609,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtSsyr2k(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{Cfloat}, A::PtrOrCuPtr{Cfloat}, + alpha::Ref{Cfloat}, A::PtrOrCuPtr{Cfloat}, lda::Csize_t, B::PtrOrCuPtr{Cfloat}, - ldb::Csize_t, beta::RefOrCuRef{Cfloat}, + ldb::Csize_t, beta::Ref{Cfloat}, C::PtrOrCuPtr{Cfloat}, ldc::Csize_t)::cublasStatus_t end @@ -5621,10 +5621,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtDsyr2k(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{Cdouble}, + alpha::Ref{Cdouble}, A::PtrOrCuPtr{Cdouble}, lda::Csize_t, B::PtrOrCuPtr{Cdouble}, ldb::Csize_t, - beta::RefOrCuRef{Cdouble}, + beta::Ref{Cdouble}, C::PtrOrCuPtr{Cdouble}, ldc::Csize_t)::cublasStatus_t end @@ -5634,10 +5634,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtCsyr2k(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{cuComplex}, + alpha::Ref{cuComplex}, A::PtrOrCuPtr{cuComplex}, lda::Csize_t, B::PtrOrCuPtr{cuComplex}, ldb::Csize_t, - beta::RefOrCuRef{cuComplex}, + beta::Ref{cuComplex}, C::PtrOrCuPtr{cuComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5647,10 +5647,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtZsyr2k(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::Ref{cuDoubleComplex}, A::PtrOrCuPtr{cuDoubleComplex}, lda::Csize_t, B::PtrOrCuPtr{cuDoubleComplex}, ldb::Csize_t, - beta::RefOrCuRef{cuDoubleComplex}, + beta::Ref{cuDoubleComplex}, C::PtrOrCuPtr{cuDoubleComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5660,10 +5660,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtCherkx(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{cuComplex}, + alpha::Ref{cuComplex}, A::PtrOrCuPtr{cuComplex}, lda::Csize_t, B::PtrOrCuPtr{cuComplex}, ldb::Csize_t, - beta::RefOrCuRef{Cfloat}, + beta::Ref{Cfloat}, C::PtrOrCuPtr{cuComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5673,10 +5673,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtZherkx(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::Ref{cuDoubleComplex}, A::PtrOrCuPtr{cuDoubleComplex}, lda::Csize_t, B::PtrOrCuPtr{cuDoubleComplex}, ldb::Csize_t, - beta::RefOrCuRef{Cdouble}, + beta::Ref{Cdouble}, C::PtrOrCuPtr{cuDoubleComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5687,7 +5687,7 @@ end @gcsafe_ccall libcublas.cublasXtStrsm(handle::cublasXtHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Csize_t, n::Csize_t, - alpha::RefOrCuRef{Cfloat}, A::PtrOrCuPtr{Cfloat}, + alpha::Ref{Cfloat}, A::PtrOrCuPtr{Cfloat}, lda::Csize_t, B::PtrOrCuPtr{Cfloat}, ldb::Csize_t)::cublasStatus_t end @@ -5698,7 +5698,7 @@ end @gcsafe_ccall libcublas.cublasXtDtrsm(handle::cublasXtHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Csize_t, n::Csize_t, - alpha::RefOrCuRef{Cdouble}, + alpha::Ref{Cdouble}, A::PtrOrCuPtr{Cdouble}, lda::Csize_t, B::PtrOrCuPtr{Cdouble}, ldb::Csize_t)::cublasStatus_t @@ -5710,7 +5710,7 @@ end @gcsafe_ccall libcublas.cublasXtCtrsm(handle::cublasXtHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Csize_t, n::Csize_t, - alpha::RefOrCuRef{cuComplex}, + alpha::Ref{cuComplex}, A::PtrOrCuPtr{cuComplex}, lda::Csize_t, B::PtrOrCuPtr{cuComplex}, ldb::Csize_t)::cublasStatus_t @@ -5722,7 +5722,7 @@ end @gcsafe_ccall libcublas.cublasXtZtrsm(handle::cublasXtHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Csize_t, n::Csize_t, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::Ref{cuDoubleComplex}, A::PtrOrCuPtr{cuDoubleComplex}, lda::Csize_t, B::PtrOrCuPtr{cuDoubleComplex}, ldb::Csize_t)::cublasStatus_t @@ -5733,9 +5733,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtSsymm(handle::cublasXtHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Csize_t, n::Csize_t, - alpha::RefOrCuRef{Cfloat}, A::PtrOrCuPtr{Cfloat}, + alpha::Ref{Cfloat}, A::PtrOrCuPtr{Cfloat}, lda::Csize_t, B::PtrOrCuPtr{Cfloat}, ldb::Csize_t, - beta::RefOrCuRef{Cfloat}, C::PtrOrCuPtr{Cfloat}, + beta::Ref{Cfloat}, C::PtrOrCuPtr{Cfloat}, ldc::Csize_t)::cublasStatus_t end @@ -5744,10 +5744,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtDsymm(handle::cublasXtHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Csize_t, n::Csize_t, - alpha::RefOrCuRef{Cdouble}, + alpha::Ref{Cdouble}, A::PtrOrCuPtr{Cdouble}, lda::Csize_t, B::PtrOrCuPtr{Cdouble}, ldb::Csize_t, - beta::RefOrCuRef{Cdouble}, C::PtrOrCuPtr{Cdouble}, + beta::Ref{Cdouble}, C::PtrOrCuPtr{Cdouble}, ldc::Csize_t)::cublasStatus_t end @@ -5756,10 +5756,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtCsymm(handle::cublasXtHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Csize_t, n::Csize_t, - alpha::RefOrCuRef{cuComplex}, + alpha::Ref{cuComplex}, A::PtrOrCuPtr{cuComplex}, lda::Csize_t, B::PtrOrCuPtr{cuComplex}, ldb::Csize_t, - beta::RefOrCuRef{cuComplex}, + beta::Ref{cuComplex}, C::PtrOrCuPtr{cuComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5769,10 +5769,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtZsymm(handle::cublasXtHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Csize_t, n::Csize_t, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::Ref{cuDoubleComplex}, A::PtrOrCuPtr{cuDoubleComplex}, lda::Csize_t, B::PtrOrCuPtr{cuDoubleComplex}, ldb::Csize_t, - beta::RefOrCuRef{cuDoubleComplex}, + beta::Ref{cuDoubleComplex}, C::PtrOrCuPtr{cuDoubleComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5782,10 +5782,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtChemm(handle::cublasXtHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Csize_t, n::Csize_t, - alpha::RefOrCuRef{cuComplex}, + alpha::Ref{cuComplex}, A::PtrOrCuPtr{cuComplex}, lda::Csize_t, B::PtrOrCuPtr{cuComplex}, ldb::Csize_t, - beta::RefOrCuRef{cuComplex}, + beta::Ref{cuComplex}, C::PtrOrCuPtr{cuComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5795,10 +5795,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtZhemm(handle::cublasXtHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, m::Csize_t, n::Csize_t, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::Ref{cuDoubleComplex}, A::PtrOrCuPtr{cuDoubleComplex}, lda::Csize_t, B::PtrOrCuPtr{cuDoubleComplex}, ldb::Csize_t, - beta::RefOrCuRef{cuDoubleComplex}, + beta::Ref{cuDoubleComplex}, C::PtrOrCuPtr{cuDoubleComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5808,9 +5808,9 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtSsyrkx(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{Cfloat}, A::PtrOrCuPtr{Cfloat}, + alpha::Ref{Cfloat}, A::PtrOrCuPtr{Cfloat}, lda::Csize_t, B::PtrOrCuPtr{Cfloat}, - ldb::Csize_t, beta::RefOrCuRef{Cfloat}, + ldb::Csize_t, beta::Ref{Cfloat}, C::PtrOrCuPtr{Cfloat}, ldc::Csize_t)::cublasStatus_t end @@ -5820,10 +5820,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtDsyrkx(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{Cdouble}, + alpha::Ref{Cdouble}, A::PtrOrCuPtr{Cdouble}, lda::Csize_t, B::PtrOrCuPtr{Cdouble}, ldb::Csize_t, - beta::RefOrCuRef{Cdouble}, + beta::Ref{Cdouble}, C::PtrOrCuPtr{Cdouble}, ldc::Csize_t)::cublasStatus_t end @@ -5833,10 +5833,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtCsyrkx(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{cuComplex}, + alpha::Ref{cuComplex}, A::PtrOrCuPtr{cuComplex}, lda::Csize_t, B::PtrOrCuPtr{cuComplex}, ldb::Csize_t, - beta::RefOrCuRef{cuComplex}, + beta::Ref{cuComplex}, C::PtrOrCuPtr{cuComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5846,10 +5846,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtZsyrkx(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::Ref{cuDoubleComplex}, A::PtrOrCuPtr{cuDoubleComplex}, lda::Csize_t, B::PtrOrCuPtr{cuDoubleComplex}, ldb::Csize_t, - beta::RefOrCuRef{cuDoubleComplex}, + beta::Ref{cuDoubleComplex}, C::PtrOrCuPtr{cuDoubleComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5859,10 +5859,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtCher2k(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{cuComplex}, + alpha::Ref{cuComplex}, A::PtrOrCuPtr{cuComplex}, lda::Csize_t, B::PtrOrCuPtr{cuComplex}, ldb::Csize_t, - beta::RefOrCuRef{Cfloat}, + beta::Ref{Cfloat}, C::PtrOrCuPtr{cuComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5872,10 +5872,10 @@ end initialize_context() @gcsafe_ccall libcublas.cublasXtZher2k(handle::cublasXtHandle_t, uplo::cublasFillMode_t, trans::cublasOperation_t, n::Csize_t, k::Csize_t, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::Ref{cuDoubleComplex}, A::PtrOrCuPtr{cuDoubleComplex}, lda::Csize_t, B::PtrOrCuPtr{cuDoubleComplex}, ldb::Csize_t, - beta::RefOrCuRef{Cdouble}, + beta::Ref{Cdouble}, C::PtrOrCuPtr{cuDoubleComplex}, ldc::Csize_t)::cublasStatus_t end @@ -5928,7 +5928,7 @@ end @gcsafe_ccall libcublas.cublasXtStrmm(handle::cublasXtHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Csize_t, n::Csize_t, - alpha::RefOrCuRef{Cfloat}, A::PtrOrCuPtr{Cfloat}, + alpha::Ref{Cfloat}, A::PtrOrCuPtr{Cfloat}, lda::Csize_t, B::PtrOrCuPtr{Cfloat}, ldb::Csize_t, C::PtrOrCuPtr{Cfloat}, ldc::Csize_t)::cublasStatus_t @@ -5940,7 +5940,7 @@ end @gcsafe_ccall libcublas.cublasXtDtrmm(handle::cublasXtHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Csize_t, n::Csize_t, - alpha::RefOrCuRef{Cdouble}, + alpha::Ref{Cdouble}, A::PtrOrCuPtr{Cdouble}, lda::Csize_t, B::PtrOrCuPtr{Cdouble}, ldb::Csize_t, C::PtrOrCuPtr{Cdouble}, @@ -5953,7 +5953,7 @@ end @gcsafe_ccall libcublas.cublasXtCtrmm(handle::cublasXtHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Csize_t, n::Csize_t, - alpha::RefOrCuRef{cuComplex}, + alpha::Ref{cuComplex}, A::PtrOrCuPtr{cuComplex}, lda::Csize_t, B::PtrOrCuPtr{cuComplex}, ldb::Csize_t, C::PtrOrCuPtr{cuComplex}, @@ -5966,7 +5966,7 @@ end @gcsafe_ccall libcublas.cublasXtZtrmm(handle::cublasXtHandle_t, side::cublasSideMode_t, uplo::cublasFillMode_t, trans::cublasOperation_t, diag::cublasDiagType_t, m::Csize_t, n::Csize_t, - alpha::RefOrCuRef{cuDoubleComplex}, + alpha::Ref{cuDoubleComplex}, A::PtrOrCuPtr{cuDoubleComplex}, lda::Csize_t, B::PtrOrCuPtr{cuDoubleComplex}, ldb::Csize_t, C::PtrOrCuPtr{cuDoubleComplex}, @@ -5979,10 +5979,10 @@ end incx, beta, yarray, incy, batchCount) initialize_context() @ccall libcublas.cublasHSHgemvBatched(handle::cublasHandle_t, trans::cublasOperation_t, - m::Cint, n::Cint, alpha::RefOrCuRef{Cfloat}, + m::Cint, n::Cint, alpha::CuRef{Cfloat}, Aarray::CuPtr{Ptr{Float16}}, lda::Cint, xarray::CuPtr{Ptr{Float16}}, incx::Cint, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Float16}}, incy::Cint, batchCount::Cint)::cublasStatus_t end @@ -5991,10 +5991,10 @@ end incx, beta, yarray, incy, batchCount) initialize_context() @ccall libcublas.cublasHSSgemvBatched(handle::cublasHandle_t, trans::cublasOperation_t, - m::Cint, n::Cint, alpha::RefOrCuRef{Cfloat}, + m::Cint, n::Cint, alpha::CuRef{Cfloat}, Aarray::CuPtr{Ptr{Float16}}, lda::Cint, xarray::CuPtr{Ptr{Float16}}, incx::Cint, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Cfloat}}, incy::Cint, batchCount::Cint)::cublasStatus_t end @@ -6027,12 +6027,12 @@ end initialize_context() @ccall libcublas.cublasHSHgemvStridedBatched(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Float16}, lda::Cint, strideA::Clonglong, x::CuPtr{Float16}, incx::Cint, stridex::Clonglong, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, y::CuPtr{Float16}, incy::Cint, stridey::Clonglong, batchCount::Cint)::cublasStatus_t @@ -6044,12 +6044,12 @@ end initialize_context() @ccall libcublas.cublasHSSgemvStridedBatched(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{Float16}, lda::Cint, strideA::Clonglong, x::CuPtr{Float16}, incx::Cint, stridex::Clonglong, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, y::CuPtr{Cfloat}, incy::Cint, stridey::Clonglong, batchCount::Cint)::cublasStatus_t @@ -6061,12 +6061,12 @@ end initialize_context() @ccall libcublas.cublasTSTgemvStridedBatched(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{BFloat16}, lda::Cint, strideA::Clonglong, x::CuPtr{BFloat16}, incx::Cint, stridex::Clonglong, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, y::CuPtr{BFloat16}, incy::Cint, stridey::Clonglong, batchCount::Cint)::cublasStatus_t @@ -6078,12 +6078,12 @@ end initialize_context() @ccall libcublas.cublasTSSgemvStridedBatched(handle::cublasHandle_t, trans::cublasOperation_t, m::Cint, n::Cint, - alpha::RefOrCuRef{Cfloat}, + alpha::CuRef{Cfloat}, A::CuPtr{BFloat16}, lda::Cint, strideA::Clonglong, x::CuPtr{BFloat16}, incx::Cint, stridex::Clonglong, - beta::RefOrCuRef{Cfloat}, + beta::CuRef{Cfloat}, y::CuPtr{Cfloat}, incy::Cint, stridey::Clonglong, batchCount::Cint)::cublasStatus_t @@ -6104,10 +6104,10 @@ end initialize_context() @ccall libcublas.cublasHgemmBatched(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{Float16}, + k::Cint, alpha::CuRef{Float16}, Aarray::CuPtr{Ptr{Float16}}, lda::Cint, Barray::CuPtr{Ptr{Float16}}, ldb::Cint, - beta::RefOrCuRef{Float16}, + beta::CuRef{Float16}, Carray::CuPtr{Ptr{Float16}}, ldc::Cint, batchCount::Cint)::cublasStatus_t end @@ -6119,11 +6119,11 @@ end @ccall libcublas.cublasHgemmStridedBatched(handle::cublasHandle_t, transa::cublasOperation_t, transb::cublasOperation_t, m::Cint, n::Cint, - k::Cint, alpha::RefOrCuRef{Float16}, + k::Cint, alpha::CuRef{Float16}, A::CuPtr{Float16}, lda::Cint, strideA::Clonglong, B::CuPtr{Float16}, ldb::Cint, strideB::Clonglong, - beta::RefOrCuRef{Float16}, C::CuPtr{Float16}, + beta::CuRef{Float16}, C::CuPtr{Float16}, ldc::Cint, strideC::Clonglong, batchCount::Cint)::cublasStatus_t end diff --git a/lib/cublas/wrappers.jl b/lib/cublas/wrappers.jl index db01c93f27..85299a0fd5 100644 --- a/lib/cublas/wrappers.jl +++ b/lib/cublas/wrappers.jl @@ -115,7 +115,7 @@ for (fname, fname_64, elty) in ((:cublasDscal_v2, :cublasDscal_v2_64, :Float64), (:cublasCscal_v2, :cublasCscal_v2_64, :ComplexF32)) @eval begin function scal!(n::Integer, - alpha::Number, + alpha, x::StridedCuVecOrDenseMat{$elty}) if CUBLAS.version() >= v"12.0" $fname_64(handle(), n, alpha, x, stride(x, 1)) @@ -126,18 +126,19 @@ for (fname, fname_64, elty) in ((:cublasDscal_v2, :cublasDscal_v2_64, :Float64), end end end +function scal!(n::Integer, alpha, x::StridedCuVecOrDenseMat{Float16}) + cublasScalEx(handle(), n, alpha, Float32, x, Float16, stride(x, 1), Float32) + return x +end function scal!(n::Integer, alpha::Number, x::StridedCuVecOrDenseMat{Float16}) - α = convert(Float32, alpha) - cublasScalEx(handle(), n, Ref{Float32}(α), Float32, x, Float16, stride(x, 1), Float32) + cublasScalEx(handle(), n, CuRef{Float32}(alpha), Float32, x, Float16, stride(x, 1), Float32) return x end # specific variants in case x is complex and alpha is real for (fname, fname_64, elty, celty) in ((:cublasCsscal_v2, :cublasCsscal_v2_64, :Float32, :ComplexF32), (:cublasZdscal_v2, :cublasZdscal_v2_64, :Float64, :ComplexF64)) @eval begin - function scal!(n::Integer, - alpha::$elty, - x::StridedCuVecOrDenseMat{$celty}) + function scal!(n::Integer, alpha::Union{Ref{$elty}, $elty, AbstractArray{$elty}}, x::StridedCuVecOrDenseMat{$celty}) if CUBLAS.version() >= v"12.0" $fname_64(handle(), n, alpha, x, stride(x, 1)) else @@ -147,7 +148,7 @@ for (fname, fname_64, elty, celty) in ((:cublasCsscal_v2, :cublasCsscal_v2_64, : end end end -function scal!(n::Integer, alpha::Number, x::StridedCuVecOrDenseMat{ComplexF16}) +function scal!(n::Integer, alpha, x::StridedCuVecOrDenseMat{ComplexF16}) wide_x = widen.(x) scal!(n, alpha, wide_x) thin_x = convert(typeof(x), wide_x) @@ -155,7 +156,6 @@ function scal!(n::Integer, alpha::Number, x::StridedCuVecOrDenseMat{ComplexF16}) return x end -## dot, dotc, dotu for (jname, fname, fname_64, elty) in ((:dot, :cublasDdot_v2, :cublasDdot_v2_64, :Float64), (:dot, :cublasSdot_v2, :cublasSdot_v2_64, :Float32), (:dotc, :cublasZdotc_v2, :cublasZdotc_v2_64, :ComplexF64), @@ -165,20 +165,55 @@ for (jname, fname, fname_64, elty) in ((:dot, :cublasDdot_v2, :cublasDdot_v2_64, @eval begin function $jname(n::Integer, x::StridedCuVecOrDenseMat{$elty}, - y::StridedCuVecOrDenseMat{$elty}) - result = Ref{$elty}() + y::StridedCuVecOrDenseMat{$elty}, + result) if CUBLAS.version() >= v"12.0" $fname_64(handle(), n, x, stride(x, 1), y, stride(y, 1), result) else $fname(handle(), n, x, stride(x, 1), y, stride(y, 1), result) end - return result[] + return result end end end -function dot(n::Integer, x::StridedCuVecOrDenseMat{Float16}, y::StridedCuVecOrDenseMat{Float16}) - result = Ref{Float16}() + +function dot( + n::Integer, + x::StridedCuVecOrDenseMat{T}, + y::StridedCuVecOrDenseMat{T}, + ) where {T <: Union{Float32, Float64}} + result = CuRef{T}() + dot(n, x, y, result) + return result[] +end + +function dotc( + n::Integer, + x::StridedCuVecOrDenseMat{T}, + y::StridedCuVecOrDenseMat{T}, + ) where {T <: Union{ComplexF32, ComplexF64}} + result = CuRef{T}() + dotc(n, x, y, result) + return result[] +end + +function dotu( + n::Integer, + x::StridedCuVecOrDenseMat{T}, + y::StridedCuVecOrDenseMat{T}, + ) where {T <: Union{ComplexF32, ComplexF64}} + result = CuRef{T}() + dotu(n, x, y, result) + return result[] +end + +function dot(n::Integer, x::StridedCuVecOrDenseMat{Float16}, y::StridedCuVecOrDenseMat{Float16}, result) cublasDotEx(handle(), n, x, Float16, stride(x, 1), y, Float16, stride(y, 1), result, Float16, Float32) + return result +end +function dot(n::Integer, x::StridedCuVecOrDenseMat{Float16}, y::StridedCuVecOrDenseMat{Float16}) + result = CuRef{Float16}() + dot(n, x, y, result) return result[] end function dotc(n::Integer, x::StridedCuVecOrDenseMat{ComplexF16}, y::StridedCuVecOrDenseMat{ComplexF16}) @@ -195,28 +230,44 @@ for (fname, fname_64, elty, ret_type) in ((:cublasDnrm2_v2, :cublasDnrm2_v2_64, (:cublasScnrm2_v2, :cublasScnrm2_v2_64, :ComplexF32, :Float32)) @eval begin function nrm2(n::Integer, - X::StridedCuVecOrDenseMat{$elty}) - result = Ref{$ret_type}() + X::StridedCuVecOrDenseMat{$elty}, + result, + ) if CUBLAS.version() >= v"12.0" $fname_64(handle(), n, X, stride(X, 1), result) else $fname(handle(), n, X, stride(X, 1), result) end + return result + end + function nrm2( + n::Integer, + X::StridedCuVecOrDenseMat{$elty} + ) + result = CuRef{$ret_type}() + nrm2(n, X, result) return result[] end end end + nrm2(x::StridedCuVecOrDenseMat) = nrm2(length(x), x) +nrm2(x::StridedCuVecOrDenseMat, result) = nrm2(length(x), x, result) -function nrm2(n::Integer, x::StridedCuVecOrDenseMat{Float16}) - result = Ref{Float16}() +function nrm2(n::Integer, x::StridedCuVecOrDenseMat{Float16}, result) cublasNrm2Ex(handle(), n, x, Float16, stride(x, 1), result, Float16, Float32) + return result +end +function nrm2(n::Integer, x::StridedCuVecOrDenseMat{Float16}) + result = CuRef{Float16}() + nrm2(n, x, result) return result[] end function nrm2(n::Integer, x::StridedCuVecOrDenseMat{ComplexF16}) wide_x = widen.(x) - nrm = nrm2(n, wide_x) - return convert(Float16, nrm) + wide_result = CuRef{Float32}() + nrm2(n, wide_x, wide_result) + return convert(Float16, wide_result[]) end ## asum @@ -226,13 +277,22 @@ for (fname, fname_64, elty, ret_type) in ((:cublasDasum_v2, :cublasDasum_v2_64, (:cublasScasum_v2, :cublasScasum_v2_64, :ComplexF32, :Float32)) @eval begin function asum(n::Integer, - x::StridedCuVecOrDenseMat{$elty}) - result = Ref{$ret_type}() + x::StridedCuVecOrDenseMat{$elty}, + result, + ) if CUBLAS.version() >= v"12.0" $fname_64(handle(), n, x, stride(x, 1), result) else $fname(handle(), n, x, stride(x, 1), result) end + return result + end + function asum( + n::Integer, + x::StridedCuVecOrDenseMat{$elty} + ) + result = CuRef{$ret_type}() + asum(n, x, result) return result[] end end @@ -245,7 +305,7 @@ for (fname, fname_64, elty) in ((:cublasDaxpy_v2, :cublasDaxpy_v2_64, :Float64), (:cublasCaxpy_v2, :cublasCaxpy_v2_64, :ComplexF32)) @eval begin function axpy!(n::Integer, - alpha::Number, + alpha, dx::StridedCuVecOrDenseMat{$elty}, dy::StridedCuVecOrDenseMat{$elty}) if CUBLAS.version() >= v"12.0" @@ -257,13 +317,18 @@ for (fname, fname_64, elty) in ((:cublasDaxpy_v2, :cublasDaxpy_v2_64, :Float64), end end end - -function axpy!(n::Integer, alpha::Number, dx::StridedCuVecOrDenseMat{Float16}, dy::StridedCuVecOrDenseMat{Float16}) - α = convert(Float32, alpha) - cublasAxpyEx(handle(), n, Ref{Float32}(α), Float32, dx, Float16, stride(dx, 1), dy, Float16, stride(dy, 1), Float32) +function axpy!( + n::Integer, + alpha, + dx::StridedCuVecOrDenseMat{Float16}, + dy::StridedCuVecOrDenseMat{Float16} + ) + # Float16 scalar inputs are not supported, so widen to Float32 + cublasAxpyEx(handle(), n, CuRef{Float32}(alpha), Float32, dx, Float16, stride(dx, 1), dy, Float16, stride(dy, 1), Float32) return dy end -function axpy!(n::Integer, alpha::Number, dx::StridedCuVecOrDenseMat{ComplexF16}, dy::StridedCuVecOrDenseMat{ComplexF16}) + +function axpy!(n::Integer, alpha, dx::StridedCuVecOrDenseMat{ComplexF16}, dy::StridedCuVecOrDenseMat{ComplexF16}) wide_x = widen.(dx) wide_y = widen.(dy) axpy!(n, alpha, wide_x, wide_y) @@ -273,18 +338,22 @@ function axpy!(n::Integer, alpha::Number, dx::StridedCuVecOrDenseMat{ComplexF16} end ## rot -for (fname, fname_64, elty, sty) in ((:cublasSrot_v2, :cublasSrot_v2_64, :Float32, :Number), - (:cublasDrot_v2, :cublasDrot_v2_64, :Float64, :Number), - (:cublasCrot_v2, :cublasCrot_v2_64, :ComplexF32, :Number), - (:cublasCsrot_v2, :cublasCsrot_v2_64, :ComplexF32, :Real), - (:cublasZrot_v2, :cublasZrot_v2_64, :ComplexF64, :Number), - (:cublasZdrot_v2, :cublasZdrot_v2_64, :ComplexF64, :Real)) +for (fname, fname_64, elty, sty) in ( + (:cublasSrot_v2, :cublasSrot_v2_64, :Float32, :Number), + (:cublasDrot_v2, :cublasDrot_v2_64, :Float64, :Number), + (:cublasCrot_v2, :cublasCrot_v2_64, :ComplexF32, :Number), + (:cublasCsrot_v2, :cublasCsrot_v2_64, :ComplexF32, :Real), + (:cublasZrot_v2, :cublasZrot_v2_64, :ComplexF64, :Number), + (:cublasZdrot_v2, :cublasZdrot_v2_64, :ComplexF64, :Real), + ) @eval begin function rot!(n::Integer, x::StridedCuVecOrDenseMat{$elty}, y::StridedCuVecOrDenseMat{$elty}, - c::Real, - s::$sty) + c::C, + s::S, + ) where {C<:Union{Ref{Real}, Real, AbstractArray{Real}}, + S<:Union{Ref{$sty}, $sty, AbstractArray{$sty}}} if CUBLAS.version() >= v"12.0" $fname_64(handle(), n, x, stride(x, 1), y, stride(y, 1), c, s) else @@ -303,10 +372,10 @@ for (fname, elty) in ((:cublasSrotg_v2, :Float32), ) @eval begin function rotg!(a::$elty, b::$elty) - c = Ref{real($elty)}(zero(real($elty))) - s = Ref{$elty}(zero($elty)) - ref_a = Ref(a) - ref_b = Ref(b) + c = CuRef{real($elty)}() + s = CuRef{$elty}() + ref_a = CuRef(a) + ref_b = CuRef(b) $fname(handle(), ref_a, ref_b, c, s) ref_a[], ref_b[], c[], s[] end @@ -321,7 +390,7 @@ for (fname, fname_64, elty) in ((:cublasSrotm_v2, :cublasSrotm_v2_64, :Float32), function rotm!(n::Integer, x::StridedCuVecOrDenseMat{$elty}, y::StridedCuVecOrDenseMat{$elty}, - param::AbstractVector{$elty}) + param::CuVector{$elty}) if CUBLAS.version() >= v"12.0" $fname_64(handle(), n, x, stride(x, 1), y, stride(y, 1), param) else @@ -340,13 +409,13 @@ for (fname, elty) in ((:cublasSrotmg_v2, :Float32), d2::$elty, x::$elty, y::$elty, - param::AbstractVector{$elty}) - ref_d1 = Ref(d1) - ref_d2 = Ref(d2) - ref_x = Ref(x) - ref_y = Ref(y) + param::CuVector{$elty}) + ref_d1 = CuRef(d1) + ref_d2 = CuRef(d2) + ref_x = CuRef(x) + ref_y = CuRef(y) $fname(handle(), ref_d1, ref_d2, ref_x, ref_y, param) - ref_d1[], ref_d2[], ref_x[], ref_y[], param + ref_d1[], ref_d2[], ref_x[], ref_y[], param end end end @@ -370,14 +439,16 @@ for (fname, fname_64, elty) in ((:cublasSswap_v2, :cublasSswap_v2_64, :Float32), end end -function axpby!(n::Integer, - alpha::Number, - dx::StridedCuVecOrDenseMat{T}, - beta::Number, - dy::StridedCuVecOrDenseMat{T}) where T <: Union{Float16, ComplexF16, CublasFloat} - scal!(n, beta, dy) - axpy!(n, alpha, dx, dy) - dy +function axpby!( + n::Integer, + alpha, + dx::StridedCuVecOrDenseMat{T}, + beta, + dy::StridedCuVecOrDenseMat{T} + ) where {T <: Union{Float16, ComplexF16, CublasFloat}} + scal!(n, beta, dy) + axpy!(n, alpha, dx, dy) + return dy end ## iamax @@ -388,19 +459,18 @@ for (fname, fname_64, elty) in ((:cublasIdamax_v2, :cublasIdamax_v2_64, :Float64 (:cublasIcamax_v2, :cublasIcamax_v2_64, :ComplexF32)) @eval begin function iamax(n::Integer, - dx::StridedCuVecOrDenseMat{$elty}) + dx::StridedCuVecOrDenseMat{$elty}, + result, + ) if CUBLAS.version() >= v"12.0" - result = Ref{Int64}() $fname_64(handle(), n, dx, stride(dx, 1), result) else - result = Ref{Cint}() $fname(handle(), n, dx, stride(dx, 1), result) end - return result[] + return result end end end -iamax(dx::StridedCuVecOrDenseMat) = iamax(length(dx), dx) ## iamin # iamin is not in standard blas is a CUBLAS extension @@ -410,19 +480,31 @@ for (fname, fname_64, elty) in ((:cublasIdamin_v2, :cublasIdamin_v2_64, :Float64 (:cublasIcamin_v2, :cublasIcamin_v2_64, :ComplexF32)) @eval begin function iamin(n::Integer, - dx::StridedCuVecOrDenseMat{$elty},) + dx::StridedCuVecOrDenseMat{$elty}, + result, + ) if CUBLAS.version() >= v"12.0" - result = Ref{Int64}() $fname_64(handle(), n, dx, stride(dx, 1), result) else - result = Ref{Cint}() $fname(handle(), n, dx, stride(dx, 1), result) end + return result + end + end +end + +for fname in (:iamax, :iamin) + @eval begin + function $fname(n::Integer, dx::StridedCuVecOrDenseMat) + result_type = CUBLAS.version() >= v"12.0" ? Int64 : Cint + result = CuRef{result_type}() + $fname(n, dx, result) return result[] end + $fname(dx::StridedCuVecOrDenseMat) = $fname(length(dx), dx) + $fname(dx::StridedCuVecOrDenseMat, result) = $fname(length(dx), dx, result) end end -iamin(dx::StridedCuVecOrDenseMat) = iamin(length(dx), dx) # Level 2 ## mv @@ -433,10 +515,10 @@ for (fname, fname_64, elty) in ((:cublasDgemv_v2, :cublasDgemv_v2_64, :Float64), (:cublasCgemv_v2, :cublasCgemv_v2_64, :ComplexF32)) @eval begin function gemv!(trans::Char, - alpha::Number, + alpha, A::StridedCuMatrix{$elty}, x::StridedCuVector{$elty}, - beta::Number, + beta, y::StridedCuVector{$elty}) # handle trans m,n = size(A) @@ -455,28 +537,29 @@ for (fname, fname_64, elty) in ((:cublasDgemv_v2, :cublasDgemv_v2_64, :Float64), end end end -function gemv(trans::Char, alpha::Number, - A::StridedCuMatrix{T}, x::StridedCuVector{T}) where T - gemv!(trans, alpha, A, x, zero(T), similar(x, size(A, (trans == 'N' ? 1 : 2)))) +function gemv(trans::Char, alpha, A::StridedCuMatrix{T}, x::StridedCuVector{T}) where {T} + return gemv!(trans, alpha, A, x, zero(T), similar(x, size(A, (trans == 'N' ? 1 : 2)))) end function gemv(trans::Char, A::StridedCuMatrix{T}, x::StridedCuVector{T}) where T gemv!(trans, one(T), A, x, zero(T), similar(x, T, size(A, (trans == 'N' ? 1 : 2)))) end for (fname, fname_64, eltyin, eltyout) in ( - (:cublasDgemvBatched, :cublasDgemvBatched_64, :Float64, :Float64), - (:cublasSgemvBatched, :cublasSgemvBatched_64, :Float32, :Float32), - (:cublasHSHgemvBatched, :cublasHSHgemvBatched, :Float16, :Float16), - (:cublasHSSgemvBatched, :cublasHSSgemvBatched, :Float16, :Float32), - (:cublasZgemvBatched, :cublasZgemvBatched_64, :ComplexF64, :ComplexF64), - (:cublasCgemvBatched, :cublasCgemvBatched_64, :ComplexF32, :ComplexF32)) + (:cublasDgemvBatched, :cublasDgemvBatched_64, :Float64, :Float64), + (:cublasSgemvBatched, :cublasSgemvBatched_64, :Float32, :Float32), + (:cublasHSHgemvBatched, :cublasHSHgemvBatched, :Float16, :Float16), + (:cublasHSSgemvBatched, :cublasHSSgemvBatched, :Float16, :Float32), + (:cublasZgemvBatched, :cublasZgemvBatched_64, :ComplexF64, :ComplexF64), + (:cublasCgemvBatched, :cublasCgemvBatched_64, :ComplexF32, :ComplexF32), + ) @eval begin function gemv_batched!(trans::Char, - alpha::Number, - A::Vector{<:StridedCuMatrix{$eltyin}}, - x::Vector{<:StridedCuVector{$eltyin}}, - beta::Number, - y::Vector{<:StridedCuVector{$eltyout}}) + alpha, + A::Vector{<:StridedCuMatrix{$eltyin}}, + x::Vector{<:StridedCuVector{$eltyin}}, + beta, + y::Vector{<:StridedCuVector{$eltyout}} + ) if length(A) != length(x) || length(A) != length(y) throw(DimensionMismatch("Lengths of inputs must be the same")) end @@ -511,19 +594,21 @@ for (fname, fname_64, eltyin, eltyout) in ( end for (fname, fname_64, eltyin, eltyout) in ( - (:cublasDgemvStridedBatched, :cublasDgemvStridedBatched_64, :Float64, :Float64), - (:cublasSgemvStridedBatched, :cublasSgemvStridedBatched_64, :Float32, :Float32), - (:cublasHSHgemvStridedBatched, :cublasHSHgemvStridedBatched, :Float16, :Float16), - (:cublasHSSgemvStridedBatched, :cublasHSSgemvStridedBatched, :Float16, :Float32), - (:cublasZgemvStridedBatched, :cublasZgemvStridedBatched_64, :ComplexF64, :ComplexF64), - (:cublasCgemvStridedBatched, :cublasCgemvStridedBatched_64, :ComplexF32, :ComplexF32)) + (:cublasDgemvStridedBatched, :cublasDgemvStridedBatched_64, :Float64, :Float64), + (:cublasSgemvStridedBatched, :cublasSgemvStridedBatched_64, :Float32, :Float32), + (:cublasHSHgemvStridedBatched, :cublasHSHgemvStridedBatched, :Float16, :Float16), + (:cublasHSSgemvStridedBatched, :cublasHSSgemvStridedBatched, :Float16, :Float32), + (:cublasZgemvStridedBatched, :cublasZgemvStridedBatched_64, :ComplexF64, :ComplexF64), + (:cublasCgemvStridedBatched, :cublasCgemvStridedBatched_64, :ComplexF32, :ComplexF32), + ) @eval begin function gemv_strided_batched!(trans::Char, - alpha::Number, - A::AbstractArray{$eltyin, 3}, - x::AbstractArray{$eltyin, 2}, - beta::Number, - y::AbstractArray{$eltyout, 2}) + alpha, + A::AbstractArray{$eltyin, 3}, + x::AbstractArray{$eltyin, 2}, + beta, + y::AbstractArray{$eltyout, 2} + ) if size(A, 3) != size(x, 2) || size(A, 3) != size(y, 2) throw(DimensionMismatch("Batch sizes must be equal for all inputs")) end @@ -560,10 +645,10 @@ for (fname, fname_64, elty) in ((:cublasDgbmv_v2, :cublasDgbmv_v2_64, :Float64), m::Integer, kl::Integer, ku::Integer, - alpha::Number, + alpha, A::StridedCuMatrix{$elty}, x::StridedCuVector{$elty}, - beta::Number, + beta, y::StridedCuVector{$elty}) n = size(A,2) # check dimensions @@ -581,12 +666,12 @@ for (fname, fname_64, elty) in ((:cublasDgbmv_v2, :cublasDgbmv_v2_64, :Float64), end end end -function gbmv(trans::Char, m::Integer, kl::Integer, ku::Integer, alpha::Number, - A::StridedCuMatrix{T}, x::StridedCuVector{T}) where T +function gbmv(trans::Char, m::Integer, kl::Integer, ku::Integer, alpha, + A::StridedCuMatrix{T}, x::StridedCuVector{T}) where {T} # TODO: fix gbmv bug in julia - n = size(A,2) + n = size(A, 2) leny = trans == 'N' ? m : n - gbmv!(trans, m, kl, ku, alpha, A, x, zero(T), similar(x, leny)) + return gbmv!(trans, m, kl, ku, alpha, A, x, zero(T), similar(x, leny)) end function gbmv(trans::Char, m::Integer, kl::Integer, ku::Integer, A::StridedCuMatrix{T}, x::StridedCuVector{T}) where T @@ -598,10 +683,10 @@ for (fname, fname_64, elty) in ((:cublasDspmv_v2, :cublasDspmv_v2_64, :Float64), (:cublasSspmv_v2, :cublasSspmv_v2_64, :Float32)) @eval begin function spmv!(uplo::Char, - alpha::Number, + alpha, AP::StridedCuVector{$elty}, x::StridedCuVector{$elty}, - beta::Number, + beta, y::StridedCuVector{$elty}) n = round(Int, (sqrt(8*length(AP))-1)/2) if n != length(x) || n != length(y) throw(DimensionMismatch("")) end @@ -616,9 +701,8 @@ for (fname, fname_64, elty) in ((:cublasDspmv_v2, :cublasDspmv_v2_64, :Float64), end end end -function spmv(uplo::Char, alpha::Number, - AP::StridedCuVector{T}, x::StridedCuVector{T}) where T - spmv!(uplo, alpha, AP, x, zero(T), similar(x)) +function spmv(uplo::Char, alpha, AP::StridedCuVector{T}, x::StridedCuVector{T}) where {T} + return spmv!(uplo, alpha, AP, x, zero(T), similar(x)) end function spmv(uplo::Char, AP::StridedCuVector{T}, x::StridedCuVector{T}) where T spmv(uplo, one(T), AP, x) @@ -632,10 +716,10 @@ for (fname, fname_64, elty) in ((:cublasDsymv_v2, :cublasDsymv_v2_64, :Float64), # Note that the complex symv are not BLAS but auiliary functions in LAPACK @eval begin function symv!(uplo::Char, - alpha::Number, + alpha, A::StridedCuMatrix{$elty}, x::StridedCuVector{$elty}, - beta::Number, + beta, y::StridedCuVector{$elty}) m, n = size(A) if m != n throw(DimensionMismatch("Matrix A is $m by $n but must be square")) end @@ -652,9 +736,8 @@ for (fname, fname_64, elty) in ((:cublasDsymv_v2, :cublasDsymv_v2_64, :Float64), end end end -function symv(uplo::Char, alpha::Number, - A::StridedCuMatrix{T}, x::StridedCuVector{T}) where T - symv!(uplo, alpha, A, x, zero(T), similar(x)) +function symv(uplo::Char, alpha, A::StridedCuMatrix{T}, x::StridedCuVector{T}) where {T} + return symv!(uplo, alpha, A, x, zero(T), similar(x)) end function symv(uplo::Char, A::StridedCuMatrix{T}, x::StridedCuVector{T}) where T symv(uplo, one(T), A, x) @@ -666,10 +749,10 @@ for (fname, fname_64, elty) in ((:cublasZhemv_v2, :cublasZhemv_v2_64, :ComplexF6 (:cublasChemv_v2, :cublasChemv_v2_64, :ComplexF32)) @eval begin function hemv!(uplo::Char, - alpha::Number, + alpha, A::StridedCuMatrix{$elty}, x::StridedCuVector{$elty}, - beta::Number, + beta, y::StridedCuVector{$elty}) # TODO: fix dimension check bug in julia m, n = size(A) @@ -687,9 +770,11 @@ for (fname, fname_64, elty) in ((:cublasZhemv_v2, :cublasZhemv_v2_64, :ComplexF6 end end end -function hemv(uplo::Char, alpha::Number, A::StridedCuMatrix{T}, - x::StridedCuVector{T}) where T - hemv!(uplo, alpha, A, x, zero(T), similar(x)) +function hemv( + uplo::Char, alpha, + A::StridedCuMatrix{T}, x::StridedCuVector{T} + ) where {T} + return hemv!(uplo, alpha, A, x, zero(T), similar(x)) end function hemv(uplo::Char, A::StridedCuMatrix{T}, x::StridedCuVector{T}) where T @@ -704,10 +789,10 @@ for (fname, fname_64, elty) in ((:cublasDsbmv_v2, :cublasDsbmv_v2_64, :Float64), @eval begin function sbmv!(uplo::Char, k::Integer, - alpha::Number, + alpha, A::StridedCuMatrix{$elty}, x::StridedCuVector{$elty}, - beta::Number, + beta, y::StridedCuVector{$elty}) m, n = size(A) #if m != n throw(DimensionMismatch("Matrix A is $m by $n but must be square")) end @@ -726,10 +811,11 @@ for (fname, fname_64, elty) in ((:cublasDsbmv_v2, :cublasDsbmv_v2_64, :Float64), end end end -function sbmv(uplo::Char, k::Integer, alpha::Number, - A::StridedCuMatrix{T}, x::StridedCuVector{T}) where T - n = size(A,2) - sbmv!(uplo, k, alpha, A, x, zero(T), similar(x, n)) +function sbmv( + uplo::Char, k::Integer, alpha, + A::StridedCuMatrix{T}, x::StridedCuVector{T} + ) where {T} + return sbmv!(uplo, k, alpha, A, x, zero(T), similar(x)) end function sbmv(uplo::Char, k::Integer, A::StridedCuMatrix{T}, x::StridedCuVector{T}) where T @@ -742,10 +828,10 @@ for (fname, fname_64, elty) in ((:cublasZhbmv_v2, :cublasZhbmv_v2_64, :ComplexF6 @eval begin function hbmv!(uplo::Char, k::Integer, - alpha::Number, + alpha, A::StridedCuMatrix{$elty}, x::StridedCuVector{$elty}, - beta::Number, + beta, y::StridedCuVector{$elty}) m, n = size(A) if !(1<=(1+k)<=n) throw(DimensionMismatch("Incorrect number of bands")) end @@ -763,10 +849,11 @@ for (fname, fname_64, elty) in ((:cublasZhbmv_v2, :cublasZhbmv_v2_64, :ComplexF6 end end end -function hbmv(uplo::Char, k::Integer, alpha::Number, - A::StridedCuMatrix{T}, x::StridedCuVector{T}) where T - n = size(A,2) - hbmv!(uplo, k, alpha, A, x, zero(T), similar(x, n)) +function hbmv( + uplo::Char, k::Integer, alpha, + A::StridedCuMatrix{T}, x::StridedCuVector{T} + ) where {T} + return hbmv!(uplo, k, alpha, A, x, zero(T), similar(x)) end function hbmv(uplo::Char, k::Integer, A::StridedCuMatrix{T}, x::StridedCuVector{T}) where T @@ -907,7 +994,8 @@ for (fname, fname_64, elty) in ((:cublasDger_v2, :cublasDger_v2_64, :Float64), (:cublasZgerc_v2, :cublasZgerc_v2_64, :ComplexF64), (:cublasCgerc_v2, :cublasCgerc_v2_64, :ComplexF32)) @eval begin - function ger!(alpha::Number, + function ger!( + alpha, x::StridedCuVector{$elty}, y::StridedCuVector{$elty}, A::StridedCuMatrix{$elty}) @@ -932,7 +1020,7 @@ for (fname, fname_64, elty) in ((:cublasDspr_v2, :cublasDspr_v2_64, :Float64), (:cublasSspr_v2, :cublasSspr_v2_64, :Float32)) @eval begin function spr!(uplo::Char, - alpha::Number, + alpha, x::StridedCuVector{$elty}, AP::StridedCuVector{$elty}) n = round(Int, (sqrt(8*length(AP))-1)/2) @@ -956,7 +1044,7 @@ for (fname, fname_64, elty) in ((:cublasDsyr_v2, :cublasDsyr_v2_64, :Float64), (:cublasCsyr_v2, :cublasCsyr_v2_64, :ComplexF32)) @eval begin function syr!(uplo::Char, - alpha::Number, + alpha, x::StridedCuVector{$elty}, A::StridedCuMatrix{$elty}) m, n = size(A) @@ -975,11 +1063,13 @@ for (fname, fname_64, elty) in ((:cublasDsyr_v2, :cublasDsyr_v2_64, :Float64), end ### her -for (fname, fname_64, elty) in ((:cublasZher_v2, :cublasZher_v2_64, :ComplexF64), - (:cublasCher_v2, :cublasCher_v2_64, :ComplexF32)) +for (fname, fname_64, elty, relty) in ( + (:cublasZher_v2, :cublasZher_v2_64, :ComplexF64, :Float64), + (:cublasCher_v2, :cublasCher_v2_64, :ComplexF32, :Float32), + ) @eval begin function her!(uplo::Char, - alpha::Number, + alpha, x::StridedCuVector{$elty}, A::StridedCuMatrix{$elty}) m, n = size(A) @@ -1002,10 +1092,11 @@ for (fname, fname_64, elty) in ((:cublasZher2_v2, :cublasZher2_v2_64, :ComplexF6 (:cublasCher2_v2, :cublasCher2_v2_64, :ComplexF32)) @eval begin function her2!(uplo::Char, - alpha::Number, - x::StridedCuVector{$elty}, - y::StridedCuVector{$elty}, - A::StridedCuMatrix{$elty}) + alpha, + x::StridedCuVector{$elty}, + y::StridedCuVector{$elty}, + A::StridedCuMatrix{$elty} + ) m, n = size(A) m == n || throw(DimensionMismatch("Matrix A is $m by $n but must be square")) length(x) == n || throw(DimensionMismatch("Length of vector must be the same as the matrix dimensions")) @@ -1033,10 +1124,10 @@ for (fname, fname_64, elty) in ((:cublasDgemm_v2, :cublasDgemm_v2_64, :Float64), @eval begin function gemm!(transA::Char, transB::Char, - alpha::Number, + alpha, A::StridedCuVecOrMat{$elty}, B::StridedCuVecOrMat{$elty}, - beta::Number, + beta, C::StridedCuVecOrMat{$elty}) m = size(A, transA == 'N' ? 1 : 2) k = size(A, transA == 'N' ? 2 : 1) @@ -1056,11 +1147,19 @@ for (fname, fname_64, elty) in ((:cublasDgemm_v2, :cublasDgemm_v2_64, :Float64), end end end -function gemm(transA::Char, transB::Char, alpha::Number, - A::StridedCuVecOrMat{T}, B::StridedCuVecOrMat{T}) where T - gemm!(transA, transB, alpha, A, B, zero(T), - similar(B, (size(A, transA == 'N' ? 1 : 2), - size(B, transB == 'N' ? 2 : 1)))) +function gemm( + transA::Char, transB::Char, alpha, + A::StridedCuVecOrMat{T}, B::StridedCuVecOrMat{T} + ) where {T} + return gemm!( + transA, transB, alpha, A, B, zero(T), + similar( + B, ( + size(A, transA == 'N' ? 1 : 2), + size(B, transB == 'N' ? 2 : 1), + ) + ) + ) end function gemm(transA::Char, transB::Char, A::StridedCuVecOrMat{T}, B::StridedCuVecOrMat{T}) where T @@ -1145,10 +1244,10 @@ function gemmExComputeType(TA, TB, TC, m, k, n) end function gemmEx!(transA::Char, transB::Char, - @nospecialize(alpha::Number), + @nospecialize(alpha), @nospecialize(A::StridedCuVecOrMat), @nospecialize(B::StridedCuVecOrMat), - @nospecialize(beta::Number), + @nospecialize(beta), @nospecialize(C::StridedCuVecOrMat); algo::cublasGemmAlgo_t=CUBLAS_GEMM_DEFAULT) m = size(A, transA == 'N' ? 1 : 2) @@ -1166,22 +1265,26 @@ function gemmEx!(transA::Char, transB::Char, computeT = juliaStorageType(eltype(C), computeType) if version() >= v"11.0" # with CUDA 11, the compute type encodes the math mode. - cublasGemmEx(handle(), transA, transB, m, n, k, Ref{computeT}(alpha), A, eltype(A), lda, B, - eltype(B), ldb, Ref{computeT}(beta), C, eltype(C), ldc, computeType, algo) + cublasGemmEx( + handle(), transA, transB, m, n, k, CuRef{computeT}(alpha), A, eltype(A), lda, B, + eltype(B), ldb, CuRef{computeT}(beta), C, eltype(C), ldc, computeType, algo + ) else # before CUDA 11, it was a plain cudaDataType. computeType = convert(cudaDataType, computeT) - cublasGemmEx_old(handle(), transA, transB, m, n, k, Ref{computeT}(alpha), A, eltype(A), lda, B, - eltype(B), ldb, Ref{computeT}(beta), C, eltype(C), ldc, computeType, algo) + cublasGemmEx_old( + handle(), transA, transB, m, n, k, CuRef{computeT}(alpha), A, eltype(A), lda, B, + eltype(B), ldb, CuRef{computeT}(beta), C, eltype(C), ldc, computeType, algo + ) end C end function gemmBatchedEx!(transA::Char, transB::Char, - @nospecialize(alpha::Number), + @nospecialize(alpha), @nospecialize(A::Vector{<:StridedCuVecOrMat}), @nospecialize(B::Vector{<:StridedCuVecOrMat}), - @nospecialize(beta::Number), + @nospecialize(beta), @nospecialize(C::Vector{<:StridedCuVecOrMat}); algo::cublasGemmAlgo_t=CUBLAS_GEMM_DEFAULT) if length(A) != length(B) || length(A) != length(C) @@ -1210,8 +1313,10 @@ function gemmBatchedEx!(transA::Char, transB::Char, Cptrs = unsafe_batch(C) if version() >= v"11.0" # with CUDA 11, the compute type encodes the math mode. - cublasGemmBatchedEx(handle(), transA, transB, m, n, k, Ref{computeT}(alpha), Aptrs, eltype(A[1]), lda, Bptrs, - eltype(B[1]), ldb, Ref{computeT}(beta), Cptrs, eltype(C[1]), ldc, length(A), computeType, algo) + cublasGemmBatchedEx( + handle(), transA, transB, m, n, k, CuRef{computeT}(alpha), Aptrs, eltype(A[1]), lda, Bptrs, + eltype(B[1]), ldb, CuRef{computeT}(beta), Cptrs, eltype(C[1]), ldc, length(A), computeType, algo + ) else error("Not implemented for CUDA 11 and below.") end @@ -1222,11 +1327,12 @@ function gemmBatchedEx!(transA::Char, transB::Char, C end -function gemmStridedBatchedEx!(transA::Char, transB::Char, - @nospecialize(alpha::Number), +function gemmStridedBatchedEx!( + transA::Char, transB::Char, + @nospecialize(alpha), @nospecialize(A::AbstractArray{Ta, 3}), @nospecialize(B::AbstractArray{Tb, 3}), - @nospecialize(beta::Number), + @nospecialize(beta), @nospecialize(C::AbstractArray{Tc, 3}); algo::cublasGemmAlgo_t=CUBLAS_GEMM_DEFAULT) where {Ta, Tb, Tc} if size(A, 3) != size(B, 3) || size(A, 3) != size(C, 3) @@ -1253,8 +1359,9 @@ function gemmStridedBatchedEx!(transA::Char, transB::Char, computeT = juliaStorageType(eltype(C), computeType) if version() >= v"11.0" # with CUDA 11, the compute type encodes the math mode. - cublasGemmStridedBatchedEx(handle(), transA, transB, m, n, k, Ref{computeT}(alpha), A, eltype(A), lda, strideA, - B, eltype(B), ldb, strideB, Ref{computeT}(beta), C, eltype(C), ldc, strideC, + cublasGemmStridedBatchedEx( + handle(), transA, transB, m, n, k, CuRef{computeT}(alpha), A, eltype(A), lda, strideA, + B, eltype(B), ldb, strideB, CuRef{computeT}(beta), C, eltype(C), ldc, strideC, batchCount, computeType, algo) else error("Not implemented for CUDA 11 and below.") @@ -1337,12 +1444,19 @@ for (fname, fname_64, elty) in ((:cublasSgemmGroupedBatched, :cublasSgemmGrouped Bptrs = unsafe_batch(reduce(vcat, B)) Cptrs = unsafe_batch(reduce(vcat, C)) - if CUBLAS.version() >= v"12.0" - $fname_64(handle(), transa, transb, m, n, k, alpha, Aptrs, lda, - Bptrs, ldb, beta, Cptrs, ldc, group_count, group_size) - else - $fname(handle(), transa, transb, m, n, k, alpha, Aptrs, lda, - Bptrs, ldb, beta, Cptrs, ldc, group_count, group_size) + try + ## XXX: cublasXgemmGroupedBatched does not seem to support device pointers + cublasSetPointerMode_v2(handle(), CUBLAS_POINTER_MODE_HOST) + + if CUBLAS.version() >= v"12.0" + $fname_64(handle(), transa, transb, m, n, k, alpha, Aptrs, lda, + Bptrs, ldb, beta, Cptrs, ldc, group_count, group_size) + else + $fname(handle(), transa, transb, m, n, k, alpha, Aptrs, lda, + Bptrs, ldb, beta, Cptrs, ldc, group_count, group_size) + end + finally + cublasSetPointerMode_v2(handle(), CUBLAS_POINTER_MODE_DEVICE) end unsafe_free!(Cptrs) unsafe_free!(Bptrs) @@ -1389,12 +1503,19 @@ for (fname, fname_64, elty) in ((:cublasSgemmGroupedBatched, :cublasSgemmGrouped Bptrs = unsafe_batch(B) Cptrs = unsafe_batch(C) - if CUBLAS.version() >= v"12.0" - $fname_64(handle(), transa, transb, m, n, k, alpha, Aptrs, lda, - Bptrs, ldb, beta, Cptrs, ldc, group_count, group_size) - else - $fname(handle(), transa, transb, m, n, k, alpha, Aptrs, lda, - Bptrs, ldb, beta, Cptrs, ldc, group_count, group_size) + try + ## XXX: cublasXgemmGroupedBatched does not seem to support device pointers + cublasSetPointerMode_v2(handle(), CUBLAS_POINTER_MODE_HOST) + + if CUBLAS.version() >= v"12.0" + $fname_64(handle(), transa, transb, m, n, k, alpha, Aptrs, lda, + Bptrs, ldb, beta, Cptrs, ldc, group_count, group_size) + else + $fname(handle(), transa, transb, m, n, k, alpha, Aptrs, lda, + Bptrs, ldb, beta, Cptrs, ldc, group_count, group_size) + end + finally + cublasSetPointerMode_v2(handle(), CUBLAS_POINTER_MODE_DEVICE) end unsafe_free!(Cptrs) unsafe_free!(Bptrs) @@ -1433,6 +1554,7 @@ alpha = [one(T) for i = 1:length(transA)] gemm_grouped_batched(transA, transB, alpha, A, B) end + ## (GE) general matrix-matrix multiplication batched for (fname, fname_64, elty) in ((:cublasDgemmBatched, :cublasDgemmBatched_64, :Float64), (:cublasSgemmBatched, :cublasSgemmBatched_64, :Float32), @@ -1442,10 +1564,10 @@ for (fname, fname_64, elty) in ((:cublasDgemmBatched, :cublasDgemmBatched_64, :F @eval begin function gemm_batched!(transA::Char, transB::Char, - alpha::Number, + alpha, A::Vector{<:StridedCuMatrix{$elty}}, B::Vector{<:StridedCuMatrix{$elty}}, - beta::Number, + beta, C::Vector{<:StridedCuMatrix{$elty}}) if length(A) != length(B) || length(A) != length(C) throw(DimensionMismatch("")) @@ -1484,14 +1606,20 @@ for (fname, fname_64, elty) in ((:cublasDgemmBatched, :cublasDgemmBatched_64, :F end end -function gemm_batched(transA::Char, transB::Char, alpha::Number, - A::Vector{<:StridedCuMatrix{T}}, B::Vector{<:StridedCuMatrix{T}}) where T - C = CuMatrix{T}[similar(B[1], (size(A[1], transA == 'N' ? 1 : 2),size(B[1], transB == 'N' ? 2 : 1))) for i in 1:length(A)] - gemm_batched!(transA, transB, alpha, A, B, zero(T), C ) +function gemm_batched(transA::Char, + transB::Char, + alpha, + A::Vector{<:StridedCuMatrix{T}}, + B::Vector{<:StridedCuMatrix{T}}) where {T} + C = CuMatrix{T}[similar(B[1], (size(A[1], transA == 'N' ? 1 : 2), size(B[1], transB == 'N' ? 2 : 1))) for i in 1:length(A)] + return gemm_batched!(transA, transB, alpha, A, B, zero(T), C) end -function gemm_batched(transA::Char, transB::Char, - A::Vector{<:StridedCuMatrix{T}}, B::Vector{<:StridedCuMatrix{T}}) where T - gemm_batched(transA, transB, one(T), A, B) +function gemm_batched(transA::Char, + transB::Char, + A::Vector{<:StridedCuMatrix{T}}, + B::Vector{<:StridedCuMatrix{T}}) where {T} + C = CuMatrix{T}[similar(B[1], (size(A[1], transA == 'N' ? 1 : 2), size(B[1], transB == 'N' ? 2 : 1))) for i in 1:length(A)] + return gemm_batched!(transA, transB, one(T), A, B, zero(T), C) end ## (GE) general matrix-matrix multiplication strided batched @@ -1503,10 +1631,10 @@ for (fname, fname_64, elty) in ((:cublasDgemmStridedBatched, :cublasDgemmStrided @eval begin function gemm_strided_batched!(transA::Char, transB::Char, - alpha::Number, + alpha, A::AbstractArray{$elty, 3}, # allow PermutedDimsArray B::AbstractArray{$elty, 3}, - beta::Number, + beta, C::AbstractArray{$elty, 3}) m = size(A, transA == 'N' ? 1 : 2) k = size(A, transA == 'N' ? 2 : 1) @@ -1537,12 +1665,18 @@ for (fname, fname_64, elty) in ((:cublasDgemmStridedBatched, :cublasDgemmStrided end end end -function gemm_strided_batched(transA::Char, transB::Char, alpha::Number, - A::AbstractArray{T, 3}, B::AbstractArray{T, 3}) where T - C = similar(B, (size(A, transA == 'N' ? 1 : 2), - size(B, transB == 'N' ? 2 : 1), - max(size(A, 3), size(B, 3)))) - gemm_strided_batched!(transA, transB, alpha, A, B, zero(T), C ) +function gemm_strided_batched( + transA::Char, transB::Char, alpha, + A::AbstractArray{T, 3}, B::AbstractArray{T, 3} + ) where {T} + C = similar( + B, ( + size(A, transA == 'N' ? 1 : 2), + size(B, transB == 'N' ? 2 : 1), + max(size(A, 3), size(B, 3)), + ) + ) + return gemm_strided_batched!(transA, transB, alpha, A, B, zero(T), C) end function gemm_strided_batched(transA::Char, transB::Char, A::AbstractArray{T, 3}, B::AbstractArray{T, 3}) where T @@ -1558,10 +1692,10 @@ for (fname, fname_64, elty) in ((:cublasDsymm_v2, :cublasDsymm_v2_64, :Float64), @eval begin function symm!(side::Char, uplo::Char, - alpha::Number, + alpha, A::StridedCuMatrix{$elty}, B::StridedCuMatrix{$elty}, - beta::Number, + beta, C::StridedCuMatrix{$elty}) k, nA = size(A) if k != nA throw(DimensionMismatch("Matrix A must be square")) end @@ -1582,9 +1716,11 @@ for (fname, fname_64, elty) in ((:cublasDsymm_v2, :cublasDsymm_v2_64, :Float64), end end end -function symm(side::Char, uplo::Char, alpha::Number, - A::StridedCuMatrix{T}, B::StridedCuMatrix{T}) where T - symm!(side, uplo, alpha, A, B, zero(T), similar(B)) +function symm( + side::Char, uplo::Char, alpha, + A::StridedCuMatrix{T}, B::StridedCuMatrix{T} + ) where {T} + return symm!(side, uplo, alpha, A, B, zero(T), similar(B)) end function symm(side::Char, uplo::Char, A::StridedCuMatrix{T}, B::StridedCuMatrix{T}) where T @@ -1599,9 +1735,9 @@ for (fname, fname_64, elty) in ((:cublasDsyrk_v2, :cublasDsyrk_v2_64, :Float64), @eval begin function syrk!(uplo::Char, trans::Char, - alpha::Number, + alpha, A::StridedCuVecOrMat{$elty}, - beta::Number, + beta, C::StridedCuMatrix{$elty}) mC, n = size(C) if mC != n throw(DimensionMismatch("C must be square")) end @@ -1619,7 +1755,7 @@ for (fname, fname_64, elty) in ((:cublasDsyrk_v2, :cublasDsyrk_v2_64, :Float64), end end end -function syrk(uplo::Char, trans::Char, alpha::Number, A::StridedCuVecOrMat{T}) where T +function syrk(uplo::Char, trans::Char, alpha, A::StridedCuVecOrMat{T}) where T n = size(A, trans == 'N' ? 1 : 2) syrk!(uplo, trans, alpha, A, zero(T), similar(A, (n, n))) end @@ -1634,10 +1770,10 @@ for (fname, fname_64, elty) in ((:cublasDsyrkx, :cublasDsyrkx_64, :Float64), @eval begin function syrkx!(uplo::Char, trans::Char, - alpha::Number, + alpha, A::StridedCuVecOrMat{$elty}, B::StridedCuVecOrMat{$elty}, - beta::Number, + beta, C::StridedCuMatrix{$elty}) mC, n = size(C) if mC != n throw(DimensionMismatch("C must be square")) end @@ -1656,7 +1792,7 @@ for (fname, fname_64, elty) in ((:cublasDsyrkx, :cublasDsyrkx_64, :Float64), end end end -function syrkx(uplo::Char, trans::Char, alpha::Number, A::StridedCuVecOrMat{T}, +function syrkx(uplo::Char, trans::Char, alpha, A::StridedCuVecOrMat{T}, beta::Number, B::StridedCuVecOrMat{T}) where T n = size(A, trans == 'N' ? 1 : 2) syrkx!(uplo, trans, alpha, A, B, beta, similar(A, (n, n))) @@ -1671,10 +1807,10 @@ for (fname, fname_64, elty) in ((:cublasZhemm_v2, :cublasZhemm_v2_64, :ComplexF6 @eval begin function hemm!(side::Char, uplo::Char, - alpha::Number, + alpha, A::StridedCuMatrix{$elty}, B::StridedCuMatrix{$elty}, - beta::Number, + beta, C::StridedCuMatrix{$elty}) mA, nA = size(A) m, n = size(B) @@ -1695,7 +1831,7 @@ for (fname, fname_64, elty) in ((:cublasZhemm_v2, :cublasZhemm_v2_64, :ComplexF6 end end end -function hemm(uplo::Char, trans::Char, alpha::Number, +function hemm(uplo::Char, trans::Char, alpha, A::StridedCuMatrix{T}, B::StridedCuMatrix{T}) where T m,n = size(B) hemm!( uplo, trans, alpha, A, B, zero(T), similar(B, (m,n) ) ) @@ -1705,14 +1841,16 @@ function hemm(uplo::Char, trans::Char, A::StridedCuMatrix{T}, B::StridedCuMatrix end ## herk -for (fname, fname_64, elty) in ((:cublasZherk_v2, :cublasZherk_v2_64, :ComplexF64), - (:cublasCherk_v2, :cublasCherk_v2_64, :ComplexF32)) +for (fname, fname_64, elty, relty) in ( + (:cublasZherk_v2, :cublasZherk_v2_64, :ComplexF64, :Float64), + (:cublasCherk_v2, :cublasCherk_v2_64, :ComplexF32, :Float32), + ) @eval begin function herk!(uplo::Char, trans::Char, - alpha::Real, + alpha, A::StridedCuVecOrMat{$elty}, - beta::Real, + beta, C::StridedCuMatrix{$elty}) mC, n = size(C) if mC != n throw(DimensionMismatch("C must be square")) end @@ -1730,7 +1868,7 @@ for (fname, fname_64, elty) in ((:cublasZherk_v2, :cublasZherk_v2_64, :ComplexF6 end end end -function herk(uplo::Char, trans::Char, alpha::Real, A::StridedCuVecOrMat{T}) where T +function herk(uplo::Char, trans::Char, alpha, A::StridedCuVecOrMat{T}) where T n = size(A, trans == 'N' ? 1 : 2) herk!(uplo, trans, alpha, A, zero(real(T)), similar(A, (n,n))) end @@ -1746,10 +1884,10 @@ for (fname, fname_64, elty) in ((:cublasDsyr2k_v2, :cublasDsyr2k_v2_64, :Float64 @eval begin function syr2k!(uplo::Char, trans::Char, - alpha::Number, + alpha, A::StridedCuVecOrMat{$elty}, B::StridedCuVecOrMat{$elty}, - beta::Number, + beta, C::StridedCuMatrix{$elty}) # TODO: check size of B in julia (syr2k!) m, n = size(C) @@ -1775,27 +1913,29 @@ for (fname, fname_64, elty) in ((:cublasDsyr2k_v2, :cublasDsyr2k_v2_64, :Float64 end function syr2k(uplo::Char, trans::Char, - alpha::Number, + alpha, A::StridedCuVecOrMat, B::StridedCuVecOrMat) T = eltype(A) n = size(A, trans == 'N' ? 1 : 2) - syr2k!(uplo, trans, convert(T,alpha), A, B, zero(T), similar(A, T, (n, n))) + syr2k!(uplo, trans, alpha, A, B, zero(T), similar(A, T, (n, n))) end function syr2k(uplo::Char, trans::Char, A::StridedCuVecOrMat, B::StridedCuVecOrMat) syr2k(uplo, trans, one(eltype(A)), A, B) end ## her2k -for (fname, fname_64, elty) in ((:cublasZher2k_v2, :cublasZher2k_v2_64, :ComplexF64), - (:cublasCher2k_v2, :cublasCher2k_v2_64, :ComplexF32)) +for (fname, fname_64, elty, relty) in ( + (:cublasZher2k_v2, :cublasZher2k_v2_64, :ComplexF64, :Float64), + (:cublasCher2k_v2, :cublasCher2k_v2_64, :ComplexF32, :Float32), + ) @eval begin function her2k!(uplo::Char, trans::Char, - alpha::Number, + alpha, A::StridedCuVecOrMat{$elty}, B::StridedCuVecOrMat{$elty}, - beta::Real, + beta, C::StridedCuMatrix{$elty}) # TODO: check size of B in julia (her2k!) m, n = size(C) @@ -1818,13 +1958,18 @@ for (fname, fname_64, elty) in ((:cublasZher2k_v2, :cublasZher2k_v2_64, :Complex end C end + function her2k( + uplo::Char, + trans::Char, + alpha, + A::StridedCuVecOrMat{$elty}, + B::StridedCuVecOrMat{$elty}, + ) + n = size(A, trans == 'N' ? 1 : 2) + return her2k!(uplo, trans, alpha, A, B, zero($relty), similar(A, (n, n))) + end end end -function her2k(uplo::Char, trans::Char, alpha::Number, - A::StridedCuVecOrMat{T}, B::StridedCuVecOrMat{T}) where T - n = size(A, trans == 'N' ? 1 : 2) - her2k!(uplo, trans, alpha, A, B, zero(real(T)), similar(A, (n,n))) -end function her2k(uplo::Char, trans::Char, A::StridedCuVecOrMat{T}, B::StridedCuVecOrMat{T}) where T her2k(uplo, trans, one(T), A, B) @@ -1844,7 +1989,7 @@ for (mmname, smname, elty) in uplo::Char, transa::Char, diag::Char, - alpha::Number, + alpha, A::StridedCuMatrix{$elty}, B::StridedCuMatrix{$elty}, C::StridedCuMatrix{$elty}) @@ -1866,7 +2011,7 @@ for (mmname, smname, elty) in uplo::Char, transa::Char, diag::Char, - alpha::Number, + alpha, A::StridedCuMatrix{$elty}, B::StridedCuMatrix{$elty}) m, n = size(B) @@ -1881,11 +2026,13 @@ for (mmname, smname, elty) in end end end -function trmm(side::Char, uplo::Char, transa::Char, diag::Char, alpha::Number, +function trmm( + side::Char, uplo::Char, transa::Char, diag::Char, alpha, A::StridedCuMatrix{T}, B::StridedCuMatrix{T}) where T trmm!(side, uplo, transa, diag, alpha, A, B, similar(B)) end -function trsm(side::Char, uplo::Char, transa::Char, diag::Char,alpha::Number, +function trsm( + side::Char, uplo::Char, transa::Char, diag::Char, alpha, A::StridedCuMatrix{T}, B::StridedCuMatrix{T}) where T trsm!(side, uplo, transa, diag, alpha, A, copy(B)) end @@ -1900,7 +2047,7 @@ for (fname, fname_64, elty) in ((:cublasDtrsmBatched, :cublasDtrsmBatched_64, :F uplo::Char, transa::Char, diag::Char, - alpha::Number, + alpha, A::Vector{<:StridedCuMatrix{$elty}}, B::Vector{<:StridedCuMatrix{$elty}}) if length(A) != length(B) @@ -1930,7 +2077,7 @@ for (fname, fname_64, elty) in ((:cublasDtrsmBatched, :cublasDtrsmBatched_64, :F end end end -function trsm_batched(side::Char, uplo::Char, transa::Char, diag::Char, alpha::Number, +function trsm_batched(side::Char, uplo::Char, transa::Char, diag::Char, alpha, A::Vector{<:StridedCuMatrix{T}}, B::Vector{<:StridedCuMatrix{T}}) where T trsm_batched!(side, uplo, transa, diag, alpha, A, copy(B) ) end @@ -1947,9 +2094,9 @@ for (fname, fname_64, elty) in ((:cublasDgeam, :cublasDgeam_64, :Float64), @eval begin function geam!(transa::Char, transb::Char, - alpha::Number, + alpha, A::StridedCuMatrix{$elty}, - beta::Number, + beta, B::StridedCuMatrix{$elty}, C::StridedCuMatrix{$elty}) mA, nA = size(A) @@ -1971,8 +2118,10 @@ for (fname, fname_64, elty) in ((:cublasDgeam, :cublasDgeam_64, :Float64), end end end -function geam(transa::Char, transb::Char, alpha::Number, A::StridedCuMatrix{T}, - beta::Number, B::StridedCuMatrix{T}) where T +function geam( + transa::Char, transb::Char, alpha, A::StridedCuMatrix{T}, + beta, B::StridedCuMatrix{T} + ) where {T} m,n = size(B) if transb == 'T' || transb == 'C' geam!(transa, transb, alpha, A, beta, B, similar(B, (n,m) ) ) @@ -2170,8 +2319,8 @@ for (fname, elty) in ((:cublasDgetriBatched, :Float64), end function getri_batched!(n, Aptrs::CuVector{CuPtr{$elty}}, - lda, Cptrs::CuVector{CuPtr{$elty}},ldc, - pivotArray::CuArray{Cint}) + lda, Cptrs::CuVector{CuPtr{$elty}},ldc, + pivotArray::CuArray{Cint}) batchSize = length(Aptrs) info = CuArray{Cint}(undef, batchSize) $fname(handle(), n, Aptrs, lda, pivotArray, Cptrs, ldc, info, batchSize) diff --git a/res/wrap/cublas.toml b/res/wrap/cublas.toml index 3b1bf2e8d9..3f05cebfeb 100644 --- a/res/wrap/cublas.toml +++ b/res/wrap/cublas.toml @@ -88,15 +88,15 @@ needs_context = false [api."cublas𝕏nrm2_v2".argtypes] 3 = "CuPtr{T}" -5 = "RefOrCuRef{T}" +5 = "CuRef{T}" [api.cublasScnrm2_v2.argtypes] 3 = "CuPtr{cuComplex}" -5 = "RefOrCuRef{Cfloat}" +5 = "CuRef{Cfloat}" [api.cublasDznrm2_v2.argtypes] 3 = "CuPtr{cuDoubleComplex}" -5 = "RefOrCuRef{Cdouble}" +5 = "CuRef{Cdouble}" [api.cublasDotEx.argtypes] 3 = "CuPtr{Cvoid}" @@ -111,32 +111,32 @@ needs_context = false [api."cublas𝕏dot_v2".argtypes] 3 = "CuPtr{T}" 5 = "CuPtr{T}" -7 = "RefOrCuRef{T}" +7 = "CuRef{T}" [api."cublas𝕏dotu_v2".argtypes] 3 = "CuPtr{T}" 5 = "CuPtr{T}" -7 = "RefOrCuRef{T}" +7 = "CuRef{T}" [api."cublas𝕏dotc_v2".argtypes] 3 = "CuPtr{T}" 5 = "CuPtr{T}" -7 = "RefOrCuRef{T}" +7 = "CuRef{T}" [api.cublasScalEx.argtypes] 3 = "PtrOrCuPtr{Cvoid}" 5 = "CuPtr{Cvoid}" [api."cublas𝕏scal_v2".argtypes] -3 = "RefOrCuRef{T}" +3 = "CuRef{T}" 4 = "CuPtr{T}" [api.cublasCsscal_v2.argtypes] -3 = "RefOrCuRef{Cfloat}" +3 = "CuRef{Cfloat}" 4 = "CuPtr{cuComplex}" [api.cublasZdscal_v2.argtypes] -3 = "RefOrCuRef{Cdouble}" +3 = "CuRef{Cdouble}" 4 = "CuPtr{cuDoubleComplex}" [api.cublasAxpyEx.argtypes] @@ -145,7 +145,7 @@ needs_context = false 8 = "CuPtr{Cvoid}" [api."cublas𝕏axpy_v2".argtypes] -3 = "RefOrCuRef{T}" +3 = "CuRef{T}" 4 = "CuPtr{T}" 6 = "CuPtr{T}" @@ -167,43 +167,43 @@ needs_context = false [api.cublasIsamax_v2.argtypes] 3 = "CuPtr{Cfloat}" -5 = "RefOrCuRef{Cint}" +5 = "CuRef{Cint}" [api.cublasIdamax_v2.argtypes] 3 = "CuPtr{Cdouble}" -5 = "RefOrCuRef{Cint}" +5 = "CuRef{Cint}" [api.cublasIcamax_v2.argtypes] 3 = "CuPtr{cuComplex}" -5 = "RefOrCuRef{Cint}" +5 = "CuRef{Cint}" [api.cublasIzamax_v2.argtypes] 3 = "CuPtr{cuDoubleComplex}" -5 = "RefOrCuRef{Cint}" +5 = "CuRef{Cint}" [api.cublasIamaxEx.argtypes] 3 = "CuPtr{Cvoid}" -6 = "RefOrCuRef{Cint}" +6 = "CuRef{Cint}" [api.cublasIsamin_v2.argtypes] 3 = "CuPtr{Cfloat}" -5 = "RefOrCuRef{Cint}" +5 = "CuRef{Cint}" [api.cublasIdamin_v2.argtypes] 3 = "CuPtr{Cdouble}" -5 = "RefOrCuRef{Cint}" +5 = "CuRef{Cint}" [api.cublasIcamin_v2.argtypes] 3 = "CuPtr{cuComplex}" -5 = "RefOrCuRef{Cint}" +5 = "CuRef{Cint}" [api.cublasIzamin_v2.argtypes] 3 = "CuPtr{cuDoubleComplex}" -5 = "RefOrCuRef{Cint}" +5 = "CuRef{Cint}" [api.cublasIaminEx.argtypes] 3 = "CuPtr{Cvoid}" -6 = "RefOrCuRef{Cint}" +6 = "CuRef{Cint}" [api.cublasAsumEx.argtypes] 3 = "CuPtr{Cvoid}" @@ -211,33 +211,33 @@ needs_context = false [api."cublas𝕏asum_v2".argtypes] 3 = "CuPtr{T}" -5 = "RefOrCuRef{T}" +5 = "CuRef{T}" [api.cublasScasum_v2.argtypes] 3 = "CuPtr{cuComplex}" -5 = "RefOrCuRef{Cfloat}" +5 = "CuRef{Cfloat}" [api.cublasDzasum_v2.argtypes] 3 = "CuPtr{cuDoubleComplex}" -5 = "RefOrCuRef{Cdouble}" +5 = "CuRef{Cdouble}" [api."cublas𝕏rot_v2".argtypes] 3 = "CuPtr{T}" 5 = "CuPtr{T}" -7 = "RefOrCuRef{S}" -8 = "RefOrCuRef{T}" +7 = "CuRef{S}" +8 = "CuRef{T}" [api.cublasCsrot_v2.argtypes] 3 = "CuPtr{cuComplex}" 5 = "CuPtr{cuComplex}" -7 = "RefOrCuRef{Cfloat}" -8 = "RefOrCuRef{Cfloat}" +7 = "CuRef{Cfloat}" +8 = "CuRef{Cfloat}" [api.cublasZdrot_v2.argtypes] 3 = "CuPtr{cuDoubleComplex}" 5 = "CuPtr{cuDoubleComplex}" -7 = "RefOrCuRef{Cdouble}" -8 = "RefOrCuRef{Cdouble}" +7 = "CuRef{Cdouble}" +8 = "CuRef{Cdouble}" [api.cublasRotEx.argtypes] 3 = "CuPtr{Cvoid}" @@ -246,8 +246,8 @@ needs_context = false 10 = "PtrOrCuPtr{Cvoid}" [api."cublas𝕏rotg_v2".argtypes] -2 = "RefOrCuRef{T}" -3 = "RefOrCuRef{T}" +2 = "CuRef{T}" +3 = "CuRef{T}" 4 = "PtrOrCuPtr{S}" 5 = "PtrOrCuPtr{T}" @@ -266,10 +266,10 @@ needs_context = false 9 = "PtrOrCuPtr{Cvoid}" [api."cublas𝕏rotmg_v2".argtypes] -2 = "RefOrCuRef{T}" -3 = "RefOrCuRef{T}" -4 = "RefOrCuRef{T}" -5 = "RefOrCuRef{T}" +2 = "CuRef{T}" +3 = "CuRef{T}" +4 = "CuRef{T}" +5 = "CuRef{T}" 6 = "PtrOrCuPtr{T}" [api.cublasRotmgEx.argtypes] @@ -280,17 +280,17 @@ needs_context = false 10 = "PtrOrCuPtr{Cvoid}" [api."cublas𝕏gemv_v2".argtypes] -5 = "RefOrCuRef{T}" +5 = "CuRef{T}" 6 = "CuPtr{T}" 8 = "CuPtr{T}" -10 = "RefOrCuRef{T}" +10 = "CuRef{T}" 11 = "CuPtr{T}" [api."cublas𝕏gbmv_v2".argtypes] -7 = "RefOrCuRef{T}" +7 = "CuRef{T}" 8 = "CuPtr{T}" 10 = "CuPtr{T}" -12 = "RefOrCuRef{T}" +12 = "CuRef{T}" 13 = "CuPtr{T}" [api."cublas𝕏trmv_v2".argtypes] @@ -306,52 +306,52 @@ needs_context = false 7 = "CuPtr{T}" [api."cublas𝕏gemvBatched".argtypes] -5 = "RefOrCuRef{T}" +5 = "CuRef{T}" 6 = "CuPtr{Ptr{T}}" 8 = "CuPtr{Ptr{T}}" -10 = "RefOrCuRef{T}" +10 = "CuRef{T}" 11 = "CuPtr{Ptr{T}}" [api.cublasHSHgemvBatched.argtypes] -5 = "RefOrCuRef{Cfloat}" +5 = "CuRef{Cfloat}" 6 = "CuPtr{Ptr{Float16}}" 8 = "CuPtr{Ptr{Float16}}" -10 = "RefOrCuRef{Cfloat}" +10 = "CuRef{Cfloat}" 11 = "CuPtr{Ptr{Float16}}" [api.cublasHSSgemvBatched.argtypes] -5 = "RefOrCuRef{Cfloat}" +5 = "CuRef{Cfloat}" 6 = "CuPtr{Ptr{Float16}}" 8 = "CuPtr{Ptr{Float16}}" -10 = "RefOrCuRef{Cfloat}" +10 = "CuRef{Cfloat}" 11 = "CuPtr{Ptr{Cfloat}}" [api."cublas𝕏gemvStridedBatched".argtypes] -5 = "RefOrCuRef{T}" +5 = "CuRef{T}" 6 = "CuPtr{T}" 9 = "CuPtr{T}" -12 = "RefOrCuRef{T}" +12 = "CuRef{T}" 13 = "CuPtr{T}" [api.cublasHSSgemvStridedBatched.argtypes] -5 = "RefOrCuRef{Cfloat}" +5 = "CuRef{Cfloat}" 6 = "CuPtr{Float16}" 9 = "CuPtr{Float16}" -12 = "RefOrCuRef{Cfloat}" +12 = "CuRef{Cfloat}" 13 = "CuPtr{Cfloat}" [api.cublasTSTgemvStridedBatched.argtypes] -5 = "RefOrCuRef{Cfloat}" +5 = "CuRef{Cfloat}" 6 = "CuPtr{BFloat16}" 9 = "CuPtr{BFloat16}" -12 = "RefOrCuRef{Cfloat}" +12 = "CuRef{Cfloat}" 13 = "CuPtr{BFloat16}" [api.cublasTSSgemvStridedBatched.argtypes] -5 = "RefOrCuRef{Cfloat}" +5 = "CuRef{Cfloat}" 6 = "CuPtr{BFloat16}" 9 = "CuPtr{BFloat16}" -12 = "RefOrCuRef{Cfloat}" +12 = "CuRef{Cfloat}" 13 = "CuPtr{Cfloat}" [api."cublas𝕏trsv_v2".argtypes] @@ -367,135 +367,135 @@ needs_context = false 9 = "CuPtr{T}" [api."cublas𝕏symv_v2".argtypes] -4 = "RefOrCuRef{T}" +4 = "CuRef{T}" 5 = "CuPtr{T}" 7 = "CuPtr{T}" -9 = "RefOrCuRef{T}" +9 = "CuRef{T}" 10 = "CuPtr{T}" [api."cublas𝕏hemv_v2".argtypes] -4 = "RefOrCuRef{T}" +4 = "CuRef{T}" 5 = "CuPtr{T}" 7 = "CuPtr{T}" -9 = "RefOrCuRef{T}" +9 = "CuRef{T}" 10 = "CuPtr{T}" [api."cublas𝕏sbmv_v2".argtypes] -5 = "RefOrCuRef{T}" +5 = "CuRef{T}" 6 = "CuPtr{T}" 8 = "CuPtr{T}" -10 = "RefOrCuRef{T}" +10 = "CuRef{T}" 11 = "CuPtr{T}" [api."cublas𝕏hbmv_v2".argtypes] -5 = "RefOrCuRef{T}" +5 = "CuRef{T}" 6 = "CuPtr{T}" 8 = "CuPtr{T}" -10 = "RefOrCuRef{T}" +10 = "CuRef{T}" 11 = "CuPtr{T}" [api."cublas𝕏spmv_v2".argtypes] -4 = "RefOrCuRef{T}" +4 = "CuRef{T}" 5 = "CuPtr{T}" 6 = "CuPtr{T}" -8 = "RefOrCuRef{T}" +8 = "CuRef{T}" 9 = "CuPtr{T}" [api."cublas𝕏hpmv_v2".argtypes] -4 = "RefOrCuRef{T}" +4 = "CuRef{T}" 5 = "CuPtr{T}" 6 = "CuPtr{T}" -8 = "RefOrCuRef{T}" +8 = "CuRef{T}" 9 = "CuPtr{T}" [api."cublas𝕏ger_v2".argtypes] -4 = "RefOrCuRef{T}" +4 = "CuRef{T}" 5 = "CuPtr{T}" 7 = "CuPtr{T}" 9 = "CuPtr{T}" [api."cublas𝕏geru_v2".argtypes] -4 = "RefOrCuRef{T}" +4 = "CuRef{T}" 5 = "CuPtr{T}" 7 = "CuPtr{T}" 9 = "CuPtr{T}" [api."cublas𝕏gerc_v2".argtypes] -4 = "RefOrCuRef{T}" +4 = "CuRef{T}" 5 = "CuPtr{T}" 7 = "CuPtr{T}" 9 = "CuPtr{T}" [api."cublas𝕏syr_v2".argtypes] -4 = "RefOrCuRef{T}" +4 = "CuRef{T}" 5 = "CuPtr{T}" 7 = "CuPtr{T}" [api."cublas𝕏her_v2".argtypes] -4 = "RefOrCuRef{S}" +4 = "CuRef{S}" 5 = "CuPtr{T}" 7 = "CuPtr{T}" [api."cublas𝕏spr_v2".argtypes] -4 = "RefOrCuRef{T}" +4 = "CuRef{T}" 5 = "CuPtr{T}" 7 = "CuPtr{T}" [api."cublas𝕏hpr_v2".argtypes] -4 = "RefOrCuRef{S}" +4 = "CuRef{S}" 5 = "CuPtr{T}" 7 = "CuPtr{T}" [api."cublas𝕏syr2_v2".argtypes] -4 = "RefOrCuRef{T}" +4 = "CuRef{T}" 5 = "CuPtr{T}" 7 = "CuPtr{T}" 9 = "CuPtr{T}" [api."cublas𝕏her2_v2".argtypes] -4 = "RefOrCuRef{T}" +4 = "CuRef{T}" 5 = "CuPtr{T}" 7 = "CuPtr{T}" 9 = "CuPtr{T}" [api."cublas𝕏spr2_v2".argtypes] -4 = "RefOrCuRef{T}" +4 = "CuRef{T}" 5 = "CuPtr{T}" 7 = "CuPtr{T}" 9 = "CuPtr{T}" [api."cublas𝕏hpr2_v2".argtypes] -4 = "RefOrCuRef{T}" +4 = "CuRef{T}" 5 = "CuPtr{T}" 7 = "CuPtr{T}" 9 = "CuPtr{T}" [api."cublas𝕏gemm_v2".argtypes] -7 = "RefOrCuRef{T}" +7 = "CuRef{T}" 8 = "CuPtr{T}" 10 = "CuPtr{T}" -12 = "RefOrCuRef{T}" +12 = "CuRef{T}" 13 = "CuPtr{T}" [api."cublas𝕏gemm3m".argtypes] -7 = "RefOrCuRef{T}" +7 = "CuRef{T}" 8 = "CuPtr{T}" 10 = "CuPtr{T}" -12 = "RefOrCuRef{T}" +12 = "CuRef{T}" 13 = "CuPtr{T}" [api."cublas𝕏gemm3mEx".argtypes] -7 = "RefOrCuRef{T}" +7 = "CuRef{T}" 8 = "CuPtr{Cvoid}" 11 = "CuPtr{Cvoid}" -14 = "RefOrCuRef{T}" +14 = "CuRef{T}" 15 = "CuPtr{Cvoid}" [api."cublas𝕏gemmEx".argtypes] -7 = "RefOrCuRef{T}" +7 = "CuRef{T}" 8 = "CuPtr{Cvoid}" 11 = "CuPtr{Cvoid}" -14 = "RefOrCuRef{T}" +14 = "CuRef{T}" 15 = "CuPtr{Cvoid}" [api.cublasGemmEx.argtypes] @@ -511,106 +511,106 @@ needs_context = false 14 = "CuPtr{Cuchar}" [api."cublas𝕏syrk_v2".argtypes] -6 = "RefOrCuRef{T}" +6 = "CuRef{T}" 7 = "CuPtr{T}" -9 = "RefOrCuRef{T}" +9 = "CuRef{T}" 10 = "CuPtr{T}" [api."cublas𝕏syrkEx".argtypes] -6 = "RefOrCuRef{T}" +6 = "CuRef{T}" 7 = "CuPtr{Cvoid}" -10 = "RefOrCuRef{T}" +10 = "CuRef{T}" 11 = "CuPtr{Cvoid}" [api."cublas𝕏syrk3mEx".argtypes] -6 = "RefOrCuRef{T}" +6 = "CuRef{T}" 7 = "CuPtr{Cvoid}" -10 = "RefOrCuRef{T}" +10 = "CuRef{T}" 11 = "CuPtr{Cvoid}" [api."cublas𝕏herk_v2".argtypes] -6 = "RefOrCuRef{S}" +6 = "CuRef{S}" 7 = "CuPtr{T}" -9 = "RefOrCuRef{S}" +9 = "CuRef{S}" 10 = "CuPtr{T}" [api."cublas𝕏herkEx".argtypes] -6 = "RefOrCuRef{S}" +6 = "CuRef{S}" 7 = "CuPtr{Cvoid}" -10 = "RefOrCuRef{S}" +10 = "CuRef{S}" 11 = "CuPtr{Cvoid}" [api."cublas𝕏herk3mEx".argtypes] -6 = "RefOrCuRef{S}" +6 = "CuRef{S}" 7 = "CuPtr{Cvoid}" -10 = "RefOrCuRef{S}" +10 = "CuRef{S}" 11 = "CuPtr{Cvoid}" [api."cublas𝕏syr2k_v2".argtypes] -6 = "RefOrCuRef{T}" +6 = "CuRef{T}" 7 = "CuPtr{T}" 9 = "CuPtr{T}" -11 = "RefOrCuRef{T}" +11 = "CuRef{T}" 12 = "CuPtr{T}" [api."cublas𝕏her2k_v2".argtypes] -6 = "RefOrCuRef{T}" +6 = "CuRef{T}" 7 = "CuPtr{T}" 9 = "CuPtr{T}" -11 = "RefOrCuRef{S}" +11 = "CuRef{S}" 12 = "CuPtr{T}" [api."cublas𝕏syrkx".argtypes] -6 = "RefOrCuRef{T}" +6 = "CuRef{T}" 7 = "CuPtr{T}" 9 = "CuPtr{T}" -11 = "RefOrCuRef{T}" +11 = "CuRef{T}" 12 = "CuPtr{T}" [api."cublas𝕏herkx".argtypes] -6 = "RefOrCuRef{T}" +6 = "CuRef{T}" 7 = "CuPtr{T}" 9 = "CuPtr{T}" -11 = "RefOrCuRef{S}" +11 = "CuRef{S}" 12 = "CuPtr{T}" [api."cublas𝕏symm_v2".argtypes] -6 = "RefOrCuRef{T}" +6 = "CuRef{T}" 7 = "CuPtr{T}" 9 = "CuPtr{T}" -11 = "RefOrCuRef{T}" +11 = "CuRef{T}" 12 = "CuPtr{T}" [api."cublas𝕏hemm_v2".argtypes] -6 = "RefOrCuRef{T}" +6 = "CuRef{T}" 7 = "CuPtr{T}" 9 = "CuPtr{T}" -11 = "RefOrCuRef{T}" +11 = "CuRef{T}" 12 = "CuPtr{T}" [api."cublas𝕏trsm_v2".argtypes] -8 = "RefOrCuRef{T}" +8 = "CuRef{T}" 9 = "CuPtr{T}" 11 = "CuPtr{T}" [api."cublas𝕏trmm_v2".argtypes] -8 = "RefOrCuRef{T}" +8 = "CuRef{T}" 9 = "CuPtr{T}" 11 = "CuPtr{T}" 13 = "CuPtr{T}" [api."cublas𝕏gemmBatched".argtypes] -7 = "RefOrCuRef{T}" +7 = "CuRef{T}" 8 = "CuPtr{Ptr{T}}" 10 = "CuPtr{Ptr{T}}" -12 = "RefOrCuRef{T}" +12 = "CuRef{T}" 13 = "CuPtr{Ptr{T}}" [api."cublas𝕏gemm3mBatched".argtypes] -7 = "RefOrCuRef{T}" +7 = "CuRef{T}" 8 = "CuPtr{Ptr{T}}" 10 = "CuPtr{Ptr{T}}" -12 = "RefOrCuRef{T}" +12 = "CuRef{T}" 13 = "CuPtr{Ptr{T}}" [api.cublasGemmBatchedEx.argtypes] @@ -628,23 +628,23 @@ needs_context = false 17 = "CuPtr{Cvoid}" [api."cublas𝕏gemmStridedBatched".argtypes] -7 = "RefOrCuRef{T}" +7 = "CuRef{T}" 8 = "CuPtr{T}" 11 = "CuPtr{T}" -14 = "RefOrCuRef{T}" +14 = "CuRef{T}" 15 = "CuPtr{T}" [api."cublas𝕏gemm3mStridedBatched".argtypes] -7 = "RefOrCuRef{T}" +7 = "CuRef{T}" 8 = "CuPtr{T}" 11 = "CuPtr{T}" -14 = "RefOrCuRef{T}" +14 = "CuRef{T}" 15 = "CuPtr{T}" [api."cublas𝕏geam".argtypes] -6 = "RefOrCuRef{T}" +6 = "CuRef{T}" 7 = "CuPtr{T}" -9 = "RefOrCuRef{T}" +9 = "CuRef{T}" 10 = "CuPtr{T}" 12 = "CuPtr{T}" @@ -665,7 +665,7 @@ needs_context = false 8 = "CuPtr{Ptr{T}}" [api."cublas𝕏trsmBatched".argtypes] -8 = "RefOrCuRef{T}" +8 = "CuRef{T}" 9 = "CuPtr{Ptr{T}}" 11 = "CuPtr{Ptr{T}}" @@ -697,69 +697,69 @@ needs_context = false 6 = "CuPtr{T}" [api."cublasXt𝕏gemm".argtypes] -7 = "RefOrCuRef{T}" +7 = "Ref{T}" 8 = "PtrOrCuPtr{T}" 10 = "PtrOrCuPtr{T}" -12 = "RefOrCuRef{T}" +12 = "Ref{T}" 13 = "PtrOrCuPtr{T}" [api."cublasXt𝕏syrk".argtypes] -6 = "RefOrCuRef{T}" +6 = "Ref{T}" 7 = "PtrOrCuPtr{T}" -9 = "RefOrCuRef{T}" +9 = "Ref{T}" 10 = "PtrOrCuPtr{T}" [api."cublasXt𝕏herk".argtypes] -6 = "RefOrCuRef{S}" +6 = "Ref{S}" 7 = "PtrOrCuPtr{T}" -9 = "RefOrCuRef{S}" +9 = "Ref{S}" 10 = "PtrOrCuPtr{T}" [api."cublasXt𝕏syr2k".argtypes] -6 = "RefOrCuRef{T}" +6 = "Ref{T}" 7 = "PtrOrCuPtr{T}" 9 = "PtrOrCuPtr{T}" -11 = "RefOrCuRef{T}" +11 = "Ref{T}" 12 = "PtrOrCuPtr{T}" [api."cublasXt𝕏herkx".argtypes] -6 = "RefOrCuRef{T}" +6 = "Ref{T}" 7 = "PtrOrCuPtr{T}" 9 = "PtrOrCuPtr{T}" -11 = "RefOrCuRef{S}" +11 = "Ref{S}" 12 = "PtrOrCuPtr{T}" [api."cublasXt𝕏trsm".argtypes] -8 = "RefOrCuRef{T}" +8 = "Ref{T}" 9 = "PtrOrCuPtr{T}" 11 = "PtrOrCuPtr{T}" [api."cublasXt𝕏symm".argtypes] -6 = "RefOrCuRef{T}" +6 = "Ref{T}" 7 = "PtrOrCuPtr{T}" 9 = "PtrOrCuPtr{T}" -11 = "RefOrCuRef{T}" +11 = "Ref{T}" 12 = "PtrOrCuPtr{T}" [api."cublasXt𝕏hemm".argtypes] -6 = "RefOrCuRef{T}" +6 = "Ref{T}" 7 = "PtrOrCuPtr{T}" 9 = "PtrOrCuPtr{T}" -11 = "RefOrCuRef{T}" +11 = "Ref{T}" 12 = "PtrOrCuPtr{T}" [api."cublasXt𝕏syrkx".argtypes] -6 = "RefOrCuRef{T}" +6 = "Ref{T}" 7 = "PtrOrCuPtr{T}" 9 = "PtrOrCuPtr{T}" -11 = "RefOrCuRef{T}" +11 = "Ref{T}" 12 = "PtrOrCuPtr{T}" [api."cublasXt𝕏her2k".argtypes] -6 = "RefOrCuRef{T}" +6 = "Ref{T}" 7 = "PtrOrCuPtr{T}" 9 = "PtrOrCuPtr{T}" -11 = "RefOrCuRef{S}" +11 = "Ref{S}" 12 = "PtrOrCuPtr{T}" [api."cublasXt𝕏spmm".argtypes] @@ -769,7 +769,7 @@ needs_context = false 11 = "PtrOrCuPtr{T}" [api."cublasXt𝕏trmm".argtypes] -8 = "RefOrCuRef{T}" +8 = "Ref{T}" 9 = "PtrOrCuPtr{T}" 11 = "PtrOrCuPtr{T}" 13 = "PtrOrCuPtr{T}" diff --git a/src/pointer.jl b/src/pointer.jl index c09b227863..865bd54d01 100644 --- a/src/pointer.jl +++ b/src/pointer.jl @@ -1,6 +1,6 @@ # CUDA pointer types -export CuPtr, CU_NULL, PtrOrCuPtr, CuArrayPtr, CuRef, RefOrCuRef +export CuPtr, CU_NULL, PtrOrCuPtr, CuArrayPtr, CuRef # @@ -224,6 +224,8 @@ Base.convert(::Type{CuRef{T}}, x::CuRef{T}) where {T} = x # conversion or the actual ccall Base.unsafe_convert(::Type{CuRef{T}}, x::CuRef{T}) where {T} = Base.bitcast(CuRef{T}, Base.unsafe_convert(CuPtr{T}, x)) Base.unsafe_convert(::Type{CuRef{T}}, x) where {T} = Base.bitcast(CuRef{T}, Base.unsafe_convert(CuPtr{T}, x)) +## `@gcsafe_ccall` results in "double conversions" (remove this once `ccall` does `gcsafe`) +Base.unsafe_convert(::Type{CuPtr{T}}, x::CuRef{T}) where {T} = x # CuRef from literal pointer Base.convert(::Type{CuRef{T}}, x::CuPtr{T}) where {T} = x @@ -245,6 +247,7 @@ end CuRefArray{T}(x::AbstractArray{T}, i::Int=1) where {T} = CuRefArray{T,typeof(x)}(x, i) CuRefArray(x::AbstractArray{T}, i::Int=1) where {T} = CuRefArray{T}(x, i) Base.convert(::Type{CuRef{T}}, x::AbstractArray{T}) where {T} = CuRefArray(x, 1) +Base.convert(::Type{CuRef{T}}, x::CuRefArray{T}) where {T} = x function Base.unsafe_convert(P::Type{CuPtr{T}}, b::CuRefArray{T}) where T return pointer(b.x, b.i) @@ -255,39 +258,17 @@ end Base.unsafe_convert(::Type{CuPtr{Cvoid}}, b::CuRefArray{T}) where {T} = convert(CuPtr{Cvoid}, Base.unsafe_convert(CuPtr{T}, b)) - -## Union with all CuRef 'subtypes' - -const CuRefs{T} = Union{CuPtr{T}, CuRefArray{T}} - - -## RefOrCuRef - -if sizeof(Ptr{Cvoid}) == 8 - primitive type RefOrCuRef{T} 64 end -else - primitive type RefOrCuRef{T} 32 end +function Base.getindex(gpu::CuRefArray{T}) where {T} + cpu = Ref{T}() + GC.@preserve cpu begin + cpu_ptr = Base.unsafe_convert(Ptr{T}, cpu) + gpu_ptr = pointer(gpu.x, gpu.i) + unsafe_copyto!(cpu_ptr, gpu_ptr, 1) + end + cpu[] end -Base.convert(::Type{RefOrCuRef{T}}, x::Union{RefOrCuRef{T}, Ref{T}, CuRef{T}, CuRefs{T}}) where {T} = x - -# prefer conversion to CPU ref: this is generally cheaper -Base.convert(::Type{RefOrCuRef{T}}, x) where {T} = Ref{T}(x) -Base.unsafe_convert(::Type{RefOrCuRef{T}}, x::Ref{T}) where {T} = - Base.bitcast(RefOrCuRef{T}, Base.unsafe_convert(Ptr{T}, x)) -Base.unsafe_convert(::Type{RefOrCuRef{T}}, x) where {T} = - Base.bitcast(RefOrCuRef{T}, Base.unsafe_convert(Ptr{T}, x)) - -# support conversion from GPU ref -Base.unsafe_convert(::Type{RefOrCuRef{T}}, x::CuRefs{T}) where {T} = - Base.bitcast(RefOrCuRef{T}, Base.unsafe_convert(CuPtr{T}, x)) -# support conversion from arrays -Base.convert(::Type{RefOrCuRef{T}}, x::Array{T}) where {T} = convert(Ref{T}, x) -Base.convert(::Type{RefOrCuRef{T}}, x::AbstractArray{T}) where {T} = convert(CuRef{T}, x) -Base.unsafe_convert(P::Type{RefOrCuRef{T}}, b::CuRefArray{T}) where T = - Base.bitcast(RefOrCuRef{T}, Base.unsafe_convert(CuRef{T}, b)) +## Union with all CuRef 'subtypes' -# avoid ambiguities when passing RefOrCuRef instances -# NOTE: this happens now with `@gcsafe_ccall` due to the double `ccall` -Base.unsafe_convert(::Type{RefOrCuRef{T}}, x::RefOrCuRef{T}) where {T} = x +const CuRefs{T} = Union{CuPtr{T}, CuRefArray{T}} diff --git a/test/base/exceptions.jl b/test/base/exceptions.jl index 8b1566b368..96f08ca436 100644 --- a/test/base/exceptions.jl +++ b/test/base/exceptions.jl @@ -1,6 +1,3 @@ -# these tests spawn subprocesses, so reset the current context to conserve memory -device_reset!() - host_error_re = r"ERROR: (KernelException: exception thrown during kernel execution on device|CUDA error: an illegal instruction was encountered|CUDA error: unspecified launch failure)" device_error_re = r"ERROR: a \w+ was thrown during kernel execution" diff --git a/test/core/pointer.jl b/test/core/pointer.jl index 3633b30459..71d36fe7c4 100644 --- a/test/core/pointer.jl +++ b/test/core/pointer.jl @@ -77,22 +77,4 @@ end cuarr = CUDA.CuArray([1]) @test Base.cconvert(CuRef{Int}, cuarr) isa CUDA.CuRefArray{Int, typeof(cuarr)} @test Base.unsafe_convert(CuRef{Int}, Base.cconvert(CuRef{Int}, cuarr)) == Base.bitcast(CuRef{Int}, pointer(cuarr)) - - - # RefOrCuRef - - @test typeof(Base.cconvert(RefOrCuRef{Int}, 1)) == typeof(Ref(1)) - @test Base.unsafe_convert(RefOrCuRef{Int}, Base.cconvert(RefOrCuRef{Int}, 1)) isa RefOrCuRef{Int} - - @test Base.cconvert(RefOrCuRef{Int}, ptr) == ptr - @test Base.unsafe_convert(RefOrCuRef{Int}, Base.cconvert(RefOrCuRef{Int}, ptr)) == Base.bitcast(RefOrCuRef{Int}, ptr) - - @test Base.cconvert(RefOrCuRef{Int}, cuptr) == cuptr - @test Base.unsafe_convert(RefOrCuRef{Int}, Base.cconvert(RefOrCuRef{Int}, cuptr)) == Base.bitcast(RefOrCuRef{Int}, cuptr) - - @test Base.cconvert(RefOrCuRef{Int}, arr) isa Base.RefArray{Int, typeof(arr)} - @test Base.unsafe_convert(RefOrCuRef{Int}, Base.cconvert(RefOrCuRef{Int}, arr)) == Base.bitcast(RefOrCuRef{Int}, pointer(arr)) - - @test Base.cconvert(RefOrCuRef{Int}, cuarr) isa CUDA.CuRefArray{Int, typeof(cuarr)} - @test Base.unsafe_convert(RefOrCuRef{Int}, Base.cconvert(RefOrCuRef{Int}, cuarr)) == Base.bitcast(RefOrCuRef{Int}, pointer(cuarr)) end diff --git a/test/libraries/cublas/level1.jl b/test/libraries/cublas/level1.jl index b7c02c7ba9..1b1b978b4f 100644 --- a/test/libraries/cublas/level1.jl +++ b/test/libraries/cublas/level1.jl @@ -20,15 +20,15 @@ k = 13 CUBLAS.copy!(m,A,B) @test Array(A) == Array(B) - @test testf(rmul!, rand(T, 6, 9, 3), Ref(rand())) + @test testf(rmul!, rand(T, 6, 9, 3), rand()) @test testf(dot, rand(T, m), rand(T, m)) @test testf(*, transpose(rand(T, m)), rand(T, m)) @test testf(*, rand(T, m)', rand(T, m)) @test testf(norm, rand(T, m)) @test testf(BLAS.asum, rand(T, m)) - @test testf(axpy!, Ref(rand()), rand(T, m), rand(T, m)) - @test testf(LinearAlgebra.axpby!, Ref(rand()), rand(T, m), Ref(rand()), rand(T, m)) + @test testf(axpy!, rand(), rand(T, m), rand(T, m)) + @test testf(LinearAlgebra.axpby!, rand(), rand(T, m), rand(), rand(T, m)) if T <: Complex @test testf(dot, rand(T, m), rand(T, m)) x = rand(T, m) @@ -48,7 +48,7 @@ k = 13 @test testf(reflect!, rand(T, m), rand(T, m), rand(real(T)), rand(real(T))) @test testf(reflect!, rand(T, m), rand(T, m), rand(real(T)), rand(T)) end - + @testset "rotg!" begin a = rand(T) b = rand(T) @@ -62,7 +62,7 @@ k = 13 end @test c^2 + abs2(s) ≈ one(T) end - + if T <: Real H = rand(T, 2, 2) @testset "flag $flag" for (flag, flag_H) in ((T(-2), [one(T) zero(T); zero(T) one(T)]), @@ -76,7 +76,7 @@ k = 13 y = rand(T, rot_n) dx = CuArray(x) dy = CuArray(y) - dx, dy = CUBLAS.rotm!(rot_n, dx, dy, vcat(flag, H...)) + dx, dy = CUBLAS.rotm!(rot_n, dx, dy, CuArray(vcat(flag, H...))) h_x = collect(dx) h_y = collect(dy) @test h_x ≈ [x[1] * flag_H[1,1] + y[1] * flag_H[1,2]; x[2] * flag_H[1, 1] + y[2] * flag_H[1, 2]] @@ -84,36 +84,37 @@ k = 13 end end @testset "rotmg!" begin - param = zeros(T, 5) + gpu_param = CuArray{T}(undef, 5) x1 = rand(T) y1 = rand(T) d1 = zero(T) d2 = zero(T) x1_copy = copy(x1) y1_copy = copy(y1) - d1, d2, x1, y1, param = CUBLAS.rotmg!(d1, d2, x1, y1, param) - flag = param[1] + d1, d2, x1, y1 = CUBLAS.rotmg!(d1, d2, x1, y1, gpu_param) + cpu_param = Array(gpu_param) + flag = cpu_param[1] H = zeros(T, 2, 2) if flag == -2 - H[1, 1] = one(T) + H[1, 1] = one(T) H[1, 2] = zero(T) - H[2, 1] = zero(T) + H[2, 1] = zero(T) H[2, 2] = one(T) elseif flag == -1 - H[1, 1] = param[2] - H[1, 2] = param[3] - H[2, 1] = param[4] - H[2, 2] = param[5] + H[1, 1] = cpu_param[2] + H[1, 2] = cpu_param[3] + H[2, 1] = cpu_param[4] + H[2, 2] = cpu_param[5] elseif iszero(flag) - H[1, 1] = one(T) - H[1, 2] = param[3] - H[2, 1] = param[4] + H[1, 1] = one(T) + H[1, 2] = cpu_param[3] + H[2, 1] = cpu_param[4] H[2, 2] = one(T) elseif flag == 1 - H[1, 1] = param[2] + H[1, 1] = cpu_param[2] H[1, 2] = one(T) H[2, 1] = -one(T) - H[2, 2] = param[5] + H[2, 2] = cpu_param[5] end out = H * [(√d1) * x1_copy; (√d2) * y1_copy] @test out[2] ≈ zero(T) @@ -138,6 +139,17 @@ k = 13 ca = CuArray(a) @test BLAS.iamax(a) == CUBLAS.iamax(ca) @test CUBLAS.iamin(ca) == 3 + result_type = CUBLAS.version() >= v"12.0" ? Int64 : Cint + result = CuRef{result_type}(0) + result = CUBLAS.iamax(ca, result) + @test BLAS.iamax(a) == only(Array(result.x)) + end + @testset "nrm2 with result" begin + x = rand(T, m) + dx = CuArray(x) + result = CuRef{real(T)}(zero(real(T))) + result = CUBLAS.nrm2(dx, result) + @test norm(x) ≈ only(Array(result.x)) end end # level 1 testset @testset for T in [Float16, ComplexF16] @@ -146,12 +158,13 @@ k = 13 CUBLAS.copy!(m,A,B) @test Array(A) == Array(B) + @test testf(rmul!, rand(T, 6, 9, 3), rand()) @test testf(dot, rand(T, m), rand(T, m)) @test testf(*, transpose(rand(T, m)), rand(T, m)) @test testf(*, rand(T, m)', rand(T, m)) @test testf(norm, rand(T, m)) - @test testf(axpy!, Ref(rand()), rand(T, m), rand(T, m)) - @test testf(LinearAlgebra.axpby!, Ref(rand()), rand(T, m), Ref(rand()), rand(T, m)) + @test testf(axpy!, rand(), rand(T, m), rand(T, m)) + @test testf(LinearAlgebra.axpby!, rand(), rand(T, m), rand(), rand(T, m)) if T <: Complex @test testf(dot, rand(T, m), rand(T, m)) @@ -163,5 +176,5 @@ k = 13 z = dot(x, y) @test dz ≈ z end - end # level 1 testset -end + end +end # level 1 testset diff --git a/test/libraries/cublas/level2.jl b/test/libraries/cublas/level2.jl index ce65052c4b..65e6d8a922 100644 --- a/test/libraries/cublas/level2.jl +++ b/test/libraries/cublas/level2.jl @@ -39,10 +39,10 @@ k = 13 dA = CuArray(A) alpha = rand(elty) dy = CUBLAS.gemv('N', alpha, dA, dx) - hy = collect(dy) + hy = Array(dy) @test hy ≈ alpha * A * x dy = CUBLAS.gemv('N', dA, dx) - hy = collect(dy) + hy = Array(dy) @test hy ≈ A * x dy = CuArray(y) dx = CUBLAS.gemv(elty <: Real ? 'T' : 'C', alpha, dA, dy) diff --git a/test/libraries/cublas/level3.jl b/test/libraries/cublas/level3.jl index 52e93722c9..b80a06e248 100644 --- a/test/libraries/cublas/level3.jl +++ b/test/libraries/cublas/level3.jl @@ -17,6 +17,103 @@ k = 13 @testset "level 3" begin @testset for elty in [Float32, Float64, ComplexF32, ComplexF64] + @testset "trmm!" begin + alpha = rand(elty) + A = triu(rand(elty, m, m)) + B = rand(elty,m,n) + C = zeros(elty,m,n) + dA = CuArray(A) + dB = CuArray(B) + dC = CuArray(C) + C = alpha*A*B + CUBLAS.trmm!('L','U','N','N',alpha,dA,dB,dC) + # move to host and compare + h_C = Array(dC) + @test C ≈ h_C + end + @testset "trmm" begin + alpha = rand(elty) + A = triu(rand(elty, m, m)) + B = rand(elty,m,n) + C = zeros(elty,m,n) + dA = CuArray(A) + dB = CuArray(B) + C = alpha*A*B + d_C = CUBLAS.trmm('L','U','N','N',alpha,dA,dB) + # move to host and compare + h_C = Array(d_C) + @test C ≈ h_C + end + @testset "triangular-dense mul!" begin + A = triu(rand(elty, m, m)) + B = rand(elty,m,n) + C = zeros(elty,m,n) + + sA = rand(elty,m,m) + sA = sA + transpose(sA) + + for t in (identity, transpose, adjoint), TR in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular) + A = copy(sA) |> TR + B_L = copy(B) + B_R = copy(B') + C_L = copy(C) + C_R = copy(C') + dA = CuArray(parent(A)) |> TR + dB_L = CuArray(parent(B_L)) + dB_R = CuArray(parent(B_R)) + dC_L = CuArray(C_L) + dC_R = CuArray(C_R) + + D_L = mul!(C_L, t(A), B_L) + dD_L = mul!(dC_L, t(dA), dB_L) + + D_R = mul!(C_R, B_R, t(A)) + dD_R = mul!(dC_R, dB_R, t(dA)) + + @test C_L ≈ Array(dC_L) + @test D_L ≈ Array(dD_L) + @test C_R ≈ Array(dC_R) + @test D_R ≈ Array(dD_R) + end + end + + @testset "triangular-triangular mul!" begin + A = triu(rand(elty, m, m)) + B = triu(rand(elty, m, m)) + C0 = zeros(elty,m,m) + + sA = rand(elty,m,m) + sA = sA + transpose(sA) + sB = rand(elty,m,m) + sB = sB + transpose(sB) + + for (TRa, ta, TRb, tb, TRc, a_func, b_func) in ( + (UpperTriangular, identity, LowerTriangular, identity, Matrix, triu, tril), + (LowerTriangular, identity, UpperTriangular, identity, Matrix, tril, triu), + (UpperTriangular, identity, UpperTriangular, transpose, Matrix, triu, triu), + (UpperTriangular, transpose, UpperTriangular, identity, Matrix, triu, triu), + (LowerTriangular, identity, LowerTriangular, transpose, Matrix, tril, tril), + (LowerTriangular, transpose, LowerTriangular, identity, Matrix, tril, tril), + ) + + A = copy(sA) |> TRa + B = copy(sB) |> TRb + C = copy(C0) |> TRc + dA = CuArray(a_func(parent(sA))) |> TRa + dB = CuArray(b_func(parent(sB))) |> TRb + dC = if TRc == Matrix + CuArray(C0) |> DenseCuMatrix + else + CuArray(C0) |> TRc + end + + D = mul!(C, ta(A), tb(B)) + dD = mul!(dC, ta(dA), tb(dB)) + + @test C ≈ Array(dC) + @test D ≈ Array(dD) + end + end @testset "trsm" begin # compute @testset "adjtype=$adjtype, uplotype=$uplotype" for @@ -310,34 +407,6 @@ k = 13 h_C = triu(C) @test C ≈ h_C end - if elty <: Complex - @testset "herk!" begin - alpha = rand(elty) - beta = rand(elty) - A = rand(elty,m,m) - hA = A + A' - d_A = CuArray(A) - d_C = CuArray(hA) - CUBLAS.herk!('U','N',real(alpha),d_A,real(beta),d_C) - C = real(alpha)*(A*A') + real(beta)*hA - C = triu(C) - # move to host and compare - h_C = Array(d_C) - h_C = triu(C) - @test C ≈ h_C - end - @testset "herk" begin - A = rand(elty,m,m) - d_A = CuArray(A) - d_C = CUBLAS.herk('U','N',d_A) - C = A*A' - C = triu(C) - # move to host and compare - h_C = Array(d_C) - h_C = triu(C) - @test C ≈ h_C - end - end @testset "syr2k!" begin alpha = rand(elty) beta = rand(elty) @@ -375,8 +444,40 @@ k = 13 h_C = Array(d_C) h_C = triu(h_C) @test C ≈ h_C + C = (A*transpose(B) + B*transpose(A)) + d_C = CUBLAS.syr2k('U','N',d_A,d_B) + C = triu(C) + h_C = Array(d_C) + h_C = triu(h_C) + @test C ≈ h_C end if elty <: Complex + @testset "herk!" begin + alpha = rand(real(elty)) + beta = rand(real(elty)) + A = rand(elty,m,m) + hA = A + A' + d_A = CuArray(A) + d_C = CuArray(hA) + CUBLAS.herk!('U','N',alpha,d_A,beta,d_C) + C = real(alpha)*(A*A') + real(beta)*hA + C = triu(C) + # move to host and compare + h_C = Array(d_C) + h_C = triu(C) + @test C ≈ h_C + end + @testset "herk" begin + A = rand(elty,m,m) + d_A = CuArray(A) + d_C = CUBLAS.herk('U','N',d_A) + C = A*A' + C = triu(C) + # move to host and compare + h_C = Array(d_C) + h_C = triu(C) + @test C ≈ h_C + end @testset "her2k!" begin elty1 = elty elty2 = real(elty) @@ -403,11 +504,19 @@ k = 13 @test_throws DimensionMismatch CUBLAS.her2k!('U','N',α,d_A,d_Bbad,β,d_C) end @testset "her2k" begin + α = rand(elty) A = rand(elty,m,k) B = rand(elty,m,k) d_A = CuArray(A) d_B = CuArray(B) - C = A*B' + B*A' + C = (α*A*B' + conj(α)*B*A') + d_C = CUBLAS.her2k('U','N',α,d_A,d_B) + # move back to host and compare + C = triu(C) + h_C = Array(d_C) + h_C = triu(h_C) + @test C ≈ h_C + C = (A*B' + B*A') d_C = CUBLAS.her2k('U','N',d_A,d_B) # move back to host and compare C = triu(C) diff --git a/test/libraries/cublas/level3_gemm.jl b/test/libraries/cublas/level3/gemm.jl similarity index 82% rename from test/libraries/cublas/level3_gemm.jl rename to test/libraries/cublas/level3/gemm.jl index bdbe8d1db1..ab5e1c02e6 100644 --- a/test/libraries/cublas/level3_gemm.jl +++ b/test/libraries/cublas/level3/gemm.jl @@ -148,104 +148,6 @@ k = 13 @test C ≈ h_C @test_throws DimensionMismatch CUBLAS.symm('L','U',dsA,d_Bbad) end - @testset "trmm!" begin - alpha = rand(elty) - A = triu(rand(elty, m, m)) - B = rand(elty,m,n) - C = zeros(elty,m,n) - dA = CuArray(A) - dB = CuArray(B) - dC = CuArray(C) - C = alpha*A*B - CUBLAS.trmm!('L','U','N','N',alpha,dA,dB,dC) - # move to host and compare - h_C = Array(dC) - @test C ≈ h_C - end - @testset "trmm" begin - alpha = rand(elty) - A = triu(rand(elty, m, m)) - B = rand(elty,m,n) - C = zeros(elty,m,n) - dA = CuArray(A) - dB = CuArray(B) - dC = CuArray(C) - C = alpha*A*B - d_C = CUBLAS.trmm('L','U','N','N',alpha,dA,dB) - # move to host and compare - h_C = Array(d_C) - @test C ≈ h_C - end - @testset "triangular-dense mul!" begin - A = triu(rand(elty, m, m)) - B = rand(elty,m,n) - C = zeros(elty,m,n) - - sA = rand(elty,m,m) - sA = sA + transpose(sA) - - for t in (identity, transpose, adjoint), TR in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular) - A = copy(sA) |> TR - B_L = copy(B) - B_R = copy(B') - C_L = copy(C) - C_R = copy(C') - dA = CuArray(parent(A)) |> TR - dB_L = CuArray(parent(B_L)) - dB_R = CuArray(parent(B_R)) - dC_L = CuArray(C_L) - dC_R = CuArray(C_R) - - D_L = mul!(C_L, t(A), B_L) - dD_L = mul!(dC_L, t(dA), dB_L) - - D_R = mul!(C_R, B_R, t(A)) - dD_R = mul!(dC_R, dB_R, t(dA)) - - @test C_L ≈ Array(dC_L) - @test D_L ≈ Array(dD_L) - @test C_R ≈ Array(dC_R) - @test D_R ≈ Array(dD_R) - end - end - - @testset "triangular-triangular mul!" begin - A = triu(rand(elty, m, m)) - B = triu(rand(elty, m, m)) - C0 = zeros(elty,m,m) - - sA = rand(elty,m,m) - sA = sA + transpose(sA) - sB = rand(elty,m,m) - sB = sB + transpose(sB) - - for (TRa, ta, TRb, tb, TRc) in ( - (UpperTriangular, identity, LowerTriangular, identity, Matrix), - (LowerTriangular, identity, UpperTriangular, identity, Matrix), - (UpperTriangular, identity, UpperTriangular, transpose, Matrix), - (UpperTriangular, transpose, UpperTriangular, identity, Matrix), - (LowerTriangular, identity, LowerTriangular, transpose, Matrix), - (LowerTriangular, transpose, LowerTriangular, identity, Matrix), - ) - - A = copy(sA) |> TRa - B = copy(sB) |> TRb - C = copy(C0) |> TRc - dA = CuArray(parent(sA)) |> TRa - dB = CuArray(parent(sB)) |> TRb - dC = if TRc == Matrix - CuArray(C0) |> DenseCuMatrix - else - CuArray(C0) |> TRc - end - - D = mul!(C, ta(A), tb(B)) - dD = mul!(dC, ta(dA), tb(dB)) - - @test C ≈ Array(dC) - @test D ≈ Array(dD) - end - end if elty <: Complex @testset "hemm!" begin diff --git a/test/runtests.jl b/test/runtests.jl index 2be0872c07..9172d18a5c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -372,8 +372,7 @@ try # tests that muck with the context should not be timed with CUDA events, # since they won't be valid at the end of the test anymore. time_source = in(test, ["core/initialization", - "base/examples", - "base/exceptions"]) ? :julia : :cuda + "core/cudadrv"]) ? :julia : :cuda # run the test running_tests[test] = now() @@ -397,6 +396,12 @@ try else print_testworker_stats(test, wrkr, resp) end + + # resetting the context breaks certain CUDA libraries, + # so spawn a new worker when the test did so + if test in ["core/initialization", "core/cudadrv"] + p = recycle_worker(p) + end end if p !== nothing