Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update sparse factorizations #2152

Merged
merged 3 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/cusolver/CUSOLVER.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ using ..CUSPARSE: cusparseMatDescr_t
using CEnum: @cenum

using LinearAlgebra
using LinearAlgebra: BlasFloat
using LinearAlgebra: BlasFloat, Factorization

export has_cusolvermg

Expand Down
132 changes: 80 additions & 52 deletions lib/cusolver/sparse_factorizations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ end

Base.unsafe_convert(::Type{csrqrInfo_t}, info::SparseQRInfo) = info.info

mutable struct SparseQR{T <: BlasFloat}
mutable struct SparseQR{T <: BlasFloat} <: Factorization{T}
n::Cint
m::Cint
nnzA::Cint
Expand All @@ -34,6 +34,7 @@ function SparseQR(A::CuSparseMatrixCSR{T,Cint}, index::Char='O') where T <: Blas
buffer = CU_NULL
F = SparseQR{T}(n, m, nnzA, mu, handle, descA, info, buffer)
spqr_analyse(F, A)
spqr_buffer(F, A)
return F
end

Expand All @@ -53,35 +54,11 @@ function spqr_analyse(F::SparseQR{T}, A::CuSparseMatrixCSR{T,Cint}) where T <: B
return F
end

#csrqrSetup
for (fname, elty) in ((:cusolverSpScsrqrSetup, :Float32),
(:cusolverSpDcsrqrSetup, :Float64),
(:cusolverSpCcsrqrSetup, :ComplexF32),
(:cusolverSpZcsrqrSetup, :ComplexF64))
@eval begin
# cusolverStatus_t cusolverSpScsrqrSetup(
# cusolverSpHandle_t handle,
# int m,
# int n,
# int nnzA,
# const cusparseMatDescr_t descrA,
# const float * csrValA,
# const int * csrRowPtrA,
# const int * csrColIndA,
# float mu,
# csrqrInfo_t info);
function spqr_setup(F::SparseQR{$elty}, A::CuSparseMatrixCSR{$elty,Cint})
$fname(F.handle, F.m, F.n, F.nnzA, F.descA, A.nzVal, A.rowPtr, A.colVal, F.mu, F.info)
return F
end
end
end

for (bname, fname, sname, pname, elty, relty) in
((:cusolverSpScsrqrBufferInfo, :cusolverSpScsrqrFactor, :cusolverSpScsrqrSolve, :cusolverSpScsrqrZeroPivot, :Float32 , :Float32),
(:cusolverSpDcsrqrBufferInfo, :cusolverSpDcsrqrFactor, :cusolverSpDcsrqrSolve, :cusolverSpDcsrqrZeroPivot, :Float64 , :Float64),
(:cusolverSpCcsrqrBufferInfo, :cusolverSpCcsrqrFactor, :cusolverSpCcsrqrSolve, :cusolverSpCcsrqrZeroPivot, :ComplexF32, :Float32),
(:cusolverSpZcsrqrBufferInfo, :cusolverSpZcsrqrFactor, :cusolverSpZcsrqrSolve, :cusolverSpZcsrqrZeroPivot, :ComplexF64, :Float64))
for (bname, iname, fname, sname, pname, elty, relty) in
((:cusolverSpScsrqrBufferInfo, :cusolverSpScsrqrSetup, :cusolverSpScsrqrFactor, :cusolverSpScsrqrSolve, :cusolverSpScsrqrZeroPivot, :Float32 , :Float32),
(:cusolverSpDcsrqrBufferInfo, :cusolverSpDcsrqrSetup, :cusolverSpDcsrqrFactor, :cusolverSpDcsrqrSolve, :cusolverSpDcsrqrZeroPivot, :Float64 , :Float64),
(:cusolverSpCcsrqrBufferInfo, :cusolverSpCcsrqrSetup, :cusolverSpCcsrqrFactor, :cusolverSpCcsrqrSolve, :cusolverSpCcsrqrZeroPivot, :ComplexF32, :Float32),
(:cusolverSpZcsrqrBufferInfo, :cusolverSpZcsrqrSetup, :cusolverSpZcsrqrFactor, :cusolverSpZcsrqrSolve, :cusolverSpZcsrqrZeroPivot, :ComplexF64, :Float64))
@eval begin
# csrqrBufferInfo
#
Expand All @@ -101,20 +78,33 @@ for (bname, fname, sname, pname, elty, relty) in
internalDataInBytes = Ref{Csize_t}(0)
workspaceInBytes = Ref{Csize_t}(0)
$bname(F.handle, F.m, F.n, F.nnzA, F.descA, A.nzVal, A.rowPtr, A.colVal, F.info, internalDataInBytes, workspaceInBytes)
# TODO: allocate buffer?
F.buffer = CuVector{UInt8}(undef, workspaceInBytes[])
return F
end

# csrqrSetup
#
# cusolverStatus_t cusolverSpDcsrqrSetup(
# cusolverSpHandle_t handle,
# int m,
# int n,
# int nnzA,
# const cusparseMatDescr_t descrA,
# const double * csrValA,
# const int * csrRowPtrA,
# const int * csrColIndA,
# double mu,
# csrqrInfo_t info);
#
# csrqrFactor
#
# cusolverStatus_t cusolverSpScsrqrFactor(
# cusolverStatus_t cusolverSpDcsrqrFactor(
# cusolverSpHandle_t handle,
# int m,
# int n,
# int nnzA,
# float * b,
# float * x,
# double * b,
# double * x,
# csrqrInfo_t info,
# void * pBuffer);
#
Expand All @@ -125,15 +115,17 @@ for (bname, fname, sname, pname, elty, relty) in
# csrqrInfo_t info,
# double tol,
# int * position);
function spqr_factorise(F::SparseQR{$elty}, tol::$relty)
function spqr_factorise(F::SparseQR{$elty}, A::CuSparseMatrixCSR{$elty,Cint}, tol::$relty)
$iname(F.handle, F.m, F.n, F.nnzA, F.descA, A.nzVal, A.rowPtr, A.colVal, F.mu, F.info)
$fname(F.handle, F.m, F.n, F.nnzA, CU_NULL, CU_NULL, F.info, F.buffer)
singularity = Ref{Cint}(0)
$pname(F.handle, F.info, tol, singularity)
(singularity[] ≥ 0) && throw(SingularException(singularity[]))
return F
end

function spqr_factorise_solve(F::SparseQR{$elty}, b::CuVecOrMat{$elty}, x::CuVecOrMat{$elty}, tol::$relty)
function spqr_factorise_solve(F::SparseQR{$elty}, A::CuSparseMatrixCSR{$elty,Cint}, b::CuVector{$elty}, x::CuVector{$elty}, tol::$relty)
$iname(F.handle, F.m, F.n, F.nnzA, F.descA, A.nzVal, A.rowPtr, A.colVal, F.mu, F.info)
$fname(F.handle, F.m, F.n, F.nnzA, b, x, F.info, F.buffer)
singularity = Ref{Cint}(0)
$pname(F.handle, F.info, tol, singularity)
Expand All @@ -151,10 +143,18 @@ for (bname, fname, sname, pname, elty, relty) in
# float * x,
# csrqrInfo_t info,
# void * pBuffer);
function spqr_solve(F::SparseQR{$elty}, b::CuVecOrMat{$elty}, x::CuVecOrMat{$elty})
function spqr_solve(F::SparseQR{$elty}, b::CuVector{$elty}, x::CuVector{$elty})
$sname(F.handle, F.m, F.n, b, x, F.info, F.buffer)
return x
end

function spqr_solve(F::SparseQR{$elty}, B::CuMatrix{$elty}, X::CuMatrix{$elty})
m, p = size(B)
for j=1:p
$sname(F.handle, F.m, F.n, view(B,:,j), view(X,:,j), F.info, F.buffer)
end
return X
end
end
end

Expand All @@ -172,7 +172,7 @@ end

Base.unsafe_convert(::Type{csrcholInfo_t}, info::SparseCholeskyInfo) = info.info

mutable struct SparseCholesky{T <: BlasFloat}
mutable struct SparseCholesky{T <: BlasFloat} <: Factorization{T}
n::Cint
nnzA::Cint
handle::cusolverSpHandle_t
Expand All @@ -181,7 +181,7 @@ mutable struct SparseCholesky{T <: BlasFloat}
buffer::Union{CuPtr{Cvoid},CuVector{UInt8}}
end

function SparseCholesky(A::CuSparseMatrixCSR{T,Cint}, index::Char='O') where T <: BlasFloat
function SparseCholesky(A::Union{CuSparseMatrixCSC{T,Cint},CuSparseMatrixCSR{T,Cint}}, index::Char='O') where T <: BlasFloat
n = checksquare(A)
nnzA = nnz(A)
handle = sparse_handle()
Expand All @@ -190,6 +190,7 @@ function SparseCholesky(A::CuSparseMatrixCSR{T,Cint}, index::Char='O') where T <
buffer = CU_NULL
F = SparseCholesky{T}(n, nnzA, handle, descA, info, buffer)
spcholesky_analyse(F, A)
spcholesky_buffer(F, A)
return F
end

Expand All @@ -203,16 +204,20 @@ end
# const int * csrRowPtrA,
# const int * csrColIndA,
# csrcholInfo_t info);
function spcholesky_analyse(F::SparseCholesky{T}, A::CuSparseMatrixCSR{T}) where T <: BlasFloat
cusolverSpXcsrcholAnalysis(F.handle, F.n, F.nnzA, F.descA, A.rowPtr, A.colVal, F.info)
function spcholesky_analyse(F::SparseCholesky{T}, A::Union{CuSparseMatrixCSC{T,Cint},CuSparseMatrixCSR{T,Cint}}) where T <: BlasFloat
if A isa CuSparseMatrixCSC
cusolverSpXcsrcholAnalysis(F.handle, F.n, F.nnzA, F.descA, A.colPtr, A.rowVal, F.info)
else
cusolverSpXcsrcholAnalysis(F.handle, F.n, F.nnzA, F.descA, A.rowPtr, A.colVal, F.info)
end
return F
end

for (bname, fname, pname, sname, dname, elty, relty) in
((:cusolverSpScsrcholBufferInfo, :cusolverSpScsrcholFactor, :cusolverSpScsrcholZeroPivot, :cusolverSpScsrcholSolve, :cusolverSpScsrcholDiag, :Float32 , :Float32),
(:cusolverSpDcsrcholBufferInfo, :cusolverSpDcsrcholFactor, :cusolverSpDcsrcholZeroPivot, :cusolverSpDcsrcholSolve, :cusolverSpDcsrcholDiag, :Float64 , :Float64),
(:cusolverSpCcsrcholBufferInfo, :cusolverSpCcsrcholFactor, :cusolverSpCcsrcholZeroPivot, :cusolverSpCcsrcholSolve, :cusolverSpCcsrcholDiag, :ComplexF32, :Float32),
(:cusolverSpZcsrcholBufferInfo, :cusolverSpZcsrcholFactor, :cusolverSpZcsrcholZeroPivot, :cusolverSpZcsrcholSolve, :cusolverSpZcsrcholDiag, :ComplexF64, :Float64))
for (bname, fname, pname, elty, relty) in
((:cusolverSpScsrcholBufferInfo, :cusolverSpScsrcholFactor, :cusolverSpScsrcholZeroPivot, :Float32 , :Float32),
(:cusolverSpDcsrcholBufferInfo, :cusolverSpDcsrcholFactor, :cusolverSpDcsrcholZeroPivot, :Float64 , :Float64),
(:cusolverSpCcsrcholBufferInfo, :cusolverSpCcsrcholFactor, :cusolverSpCcsrcholZeroPivot, :ComplexF32, :Float32),
(:cusolverSpZcsrcholBufferInfo, :cusolverSpZcsrcholFactor, :cusolverSpZcsrcholZeroPivot, :ComplexF64, :Float64))
@eval begin
# csrcholBufferInfo
#
Expand All @@ -227,11 +232,14 @@ for (bname, fname, pname, sname, dname, elty, relty) in
# csrcholInfo_t info,
# size_t * internalDataInBytes,
# size_t * workspaceInBytes);
function spcholesky_buffer(F::SparseCholesky{$elty}, A::CuSparseMatrixCSR{$elty})
function spcholesky_buffer(F::SparseCholesky{$elty}, A::Union{CuSparseMatrixCSC{$elty,Cint},CuSparseMatrixCSR{$elty,Cint}})
internalDataInBytes = Ref{Csize_t}(0)
workspaceInBytes = Ref{Csize_t}(0)
$bname(F.handle, F.n, F.nnzA, F.descA, A.nzVal, A.rowPtr, A.colVal, F.info, internalDataInBytes, workspaceInBytes)
# TODO: allocate buffer?
if A isa CuSparseMatrixCSC
$bname(F.handle, F.n, F.nnzA, F.descA, A.nzVal, A.colPtr, A.rowVal, F.info, internalDataInBytes, workspaceInBytes)
else
$bname(F.handle, F.n, F.nnzA, F.descA, A.nzVal, A.rowPtr, A.colVal, F.info, internalDataInBytes, workspaceInBytes)
end
F.buffer = CuVector{UInt8}(undef, workspaceInBytes[])
return F
end
Expand All @@ -256,14 +264,26 @@ for (bname, fname, pname, sname, dname, elty, relty) in
# csrcholInfo_t info,
# float tol,
# int * position);
function spcholesky_factorise(F::SparseCholesky{$elty}, A::CuSparseMatrixCSR{$elty}, tol::$relty)
$fname(F.handle, F.n, F.nnzA, F.descA, A.nzVal, A.rowPtr, A.colVal, F.info, F.buffer)
function spcholesky_factorise(F::SparseCholesky{$elty}, A::Union{CuSparseMatrixCSC{$elty,Cint},CuSparseMatrixCSR{$elty,Cint}}, tol::$relty)
if A isa CuSparseMatrixCSC
nzval = $elty <: Complex ? conj(A.nzVal) : A.nzVal
$fname(F.handle, F.n, F.nnzA, F.descA, nzval, A.colPtr, A.rowVal, F.info, F.buffer)
else
$fname(F.handle, F.n, F.nnzA, F.descA, A.nzVal, A.rowPtr, A.colVal, F.info, F.buffer)
end
singularity = Ref{Cint}(0)
$pname(F.handle, F.info, tol, singularity)
(singularity[] ≥ 0) && throw(SingularException(singularity[]))
return F
end
end
end

for (sname, dname, elty, relty) in ((:cusolverSpScsrcholSolve, :cusolverSpScsrcholDiag, :Float32 , :Float32),
(:cusolverSpDcsrcholSolve, :cusolverSpDcsrcholDiag, :Float64 , :Float64),
(:cusolverSpCcsrcholSolve, :cusolverSpCcsrcholDiag, :ComplexF32, :Float32),
(:cusolverSpZcsrcholSolve, :cusolverSpZcsrcholDiag, :ComplexF64, :Float64))
@eval begin
# csrcholSolve
#
# cusolverStatus_t cusolverSpZcsrcholSolve(
Expand All @@ -273,11 +293,19 @@ for (bname, fname, pname, sname, dname, elty, relty) in
# cuDoubleComplex * x,
# csrcholInfo_t info,
# void * pBuffer);
function spcholesky_solve(F::SparseCholesky{$elty}, b::CuVecOrMat{$elty}, x::CuVecOrMat{$elty})
function spcholesky_solve(F::SparseCholesky{$elty}, b::CuVector{$elty}, x::CuVector{$elty})
$sname(F.handle, F.n, b, x, F.info, F.buffer)
return x
end

function spcholesky_solve(F::SparseCholesky{$elty}, B::CuMatrix{$elty}, X::CuMatrix{$elty})
n, p = size(B)
for j=1:p
$sname(F.handle, F.n, view(B,:,j), view(X,:,j), F.info, F.buffer)
end
return X
end

# csrcholDiag
#
# cusolverStatus_t cusolverSpCcsrcholDiag(
Expand Down
68 changes: 43 additions & 25 deletions test/libraries/cusolver/sparse_factorizations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,41 @@ using SparseArrays, LinearAlgebra

m = 60
n = 40
p = 5
density = 0.05

@testset "SparseCholesky -- $elty" for elty in [Float32, Float64, ComplexF32, ComplexF64]
R = real(elty)
A = sprand(elty, n, n, density)
A = A * A' + I
d_A = CuSparseMatrixCSR{elty}(A)
F = CUSOLVER.SparseCholesky(d_A)
CUSOLVER.spcholesky_buffer(F, d_A)
tol = R == Float32 ? R(1e-6) : R(1e-12)
CUSOLVER.spcholesky_factorise(F, d_A, tol)
b = rand(elty, n)
d_b = CuVector(b)
x = zeros(elty, n)
d_x = CuVector(x)
CUSOLVER.spcholesky_solve(F, d_b, d_x)
d_r = d_b - d_A * d_x
@test norm(d_A' * d_r) ≤ √eps(R)
diag = zeros(elty, n)
d_diag = CuVector{R}(diag)
CUSOLVER.spcholesky_diag(F, d_diag)
det_A = mapreduce(x -> x^2, *, d_diag)
@test det_A ≈ det(Matrix(A))
@testset "$SparseMatrixType" for SparseMatrixType in (CuSparseMatrixCSR, CuSparseMatrixCSC)
R = real(elty)
A = sprand(elty, n, n, density)
A = A * A' + I
d_A = SparseMatrixType{elty}(A)
F = CUSOLVER.SparseCholesky(d_A)
tol = R == Float32 ? R(1e-6) : R(1e-12)
CUSOLVER.spcholesky_factorise(F, d_A, tol)

b = rand(elty, n)
d_b = CuVector(b)
x = zeros(elty, n)
d_x = CuVector(x)
CUSOLVER.spcholesky_solve(F, d_b, d_x)
d_r = d_b - d_A * d_x
@test norm(d_r) ≤ √eps(R)

B = rand(elty, n, p)
d_B = CuMatrix(B)
X = zeros(elty, n, p)
d_X = CuMatrix(X)
CUSOLVER.spcholesky_solve(F, d_B, d_X)
d_R = d_B - d_A * d_X
@test norm(d_R) ≤ √eps(R)

diag = zeros(elty, n)
d_diag = CuVector{R}(diag)
CUSOLVER.spcholesky_diag(F, d_diag)
det_A = mapreduce(x -> x^2, *, d_diag)
@test det_A ≈ det(Matrix(A))
end
end

@testset "SparseQR -- $elty" for elty in [Float32, Float64, ComplexF32, ComplexF64]
Expand All @@ -34,10 +46,9 @@ end
A = A + sparse(I, m, n)
d_A = CuSparseMatrixCSR{elty}(A)
F = CUSOLVER.SparseQR(d_A)
CUSOLVER.spqr_setup(F, d_A)
CUSOLVER.spqr_buffer(F, d_A)
tol = R == Float32 ? R(1e-6) : R(1e-12)
CUSOLVER.spqr_factorise(F, tol)
CUSOLVER.spqr_factorise(F, d_A, tol)

b = rand(elty, m)
d_b = CuVector(b)
x = zeros(elty, n)
Expand All @@ -46,15 +57,22 @@ end
d_r = d_b - d_A * d_x
@test norm(d_A' * d_r) ≤ √eps(R)

B = rand(elty, m, p)
d_B = CuMatrix(B)
X = zeros(elty, n, p)
d_X = CuMatrix(X)
CUSOLVER.spqr_solve(F, copy(d_B), d_X)
d_R = d_B - d_A * d_X
@test norm(d_A' * d_R) ≤ √eps(R)

d_B = copy(d_A)
nnz_B = rand(elty, nnz(d_B))
d_B.nzVal = CuVector{elty}(nnz_B)
CUSOLVER.spqr_setup(F, d_B)
b = rand(elty, m)
d_b = CuVector(b)
x = zeros(elty, n)
d_x = CuVector(x)
CUSOLVER.spqr_factorise_solve(F, copy(d_b), d_x, tol)
CUSOLVER.spqr_factorise_solve(F, d_B, copy(d_b), d_x, tol)
d_r = d_b - d_B * d_x
@test norm(d_B' * d_r) ≤ √eps(R)
end