JuliaGPU · maleadt · Nov 1, 2023 · Oct 31, 2023 · Oct 31, 2023 · Oct 31, 2023
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -19,8 +19,7 @@ steps:
  cuda: "*"
  commands: |
  echo -e "[CUDA_Runtime_jll]\nlocal = \"true\"" >LocalPreferences.toml
- if: build.message !~ /\[skip tests\]/ &&
- build.message !~ /\[skip julia\]/
+ if: build.message !~ /\[skip tests\]/
  timeout_in_minutes: 120
  matrix:
  setup:
@@ -44,7 +43,7 @@ steps:
  - JuliaCI/julia#v1:
  version: 1.9
  - JuliaCI/julia-test#v1:
- test_args: "core base libraries"
+ test_args: "--quickfail core base libraries"
  - JuliaCI/julia-coverage#v1:
  dirs:
  - src
@@ -53,9 +52,7 @@ steps:
  agents:
  queue: "juliagpu"
  cuda: "*"
- if: build.message !~ /\[skip tests\]/ &&
- build.message !~ /\[skip cuda\]/ &&
- !build.pull_request.draft
+ if: build.message !~ /\[skip tests\]/ && !build.pull_request.draft
  timeout_in_minutes: 120
  matrix:
  setup:
@@ -73,6 +70,34 @@ steps:
  echo -e "[CUDA_Runtime_jll]\nversion = \"{{matrix.cuda}}\"" >LocalPreferences.toml
  echo -e "[CUDA_Driver_jll]\ncompat = \"false\"" >>LocalPreferences.toml
 
+ - group: "Memory"
+ key: "memory"
+ depends_on: "julia"
+ steps:
+ - label: "CuArray with {{matrix.memory}} memory"
+ plugins:
+ - JuliaCI/julia#v1:
+ version: 1.9
+ - JuliaCI/julia-test#v1:
+ test_args: "--quickfail core base libraries"
+ - JuliaCI/julia-coverage#v1:
+ dirs:
+ - src
+ - lib
+ - examples
+ agents:
+ queue: "juliagpu"
+ cuda: "*"
+ if: build.message !~ /\[skip tests\]/ && !build.pull_request.draft
+ timeout_in_minutes: 120
+ matrix:
+ setup:
+ memory:
+ - "unified"
+ - "host"
+ commands: |
+ echo -e "[CUDA]\ndefault_memory = \"{{matrix.memory}}\"" >LocalPreferences.toml
+
  - group: ":nesting_dolls: Subpackages"
  depends_on: "cuda"
  steps:
@@ -104,9 +129,7 @@ steps:
  agents:
  queue: "juliagpu"
  cuda: "*"
- if: build.message !~ /\[skip tests\]/ &&
- build.message !~ /\[skip subpackages\]/ &&
- !build.pull_request.draft
+ if: build.message !~ /\[skip tests\]/ && !build.pull_request.draft
  timeout_in_minutes: 120
  commands: |
  julia --project -e '
@@ -165,9 +188,7 @@ steps:
  agents:
  queue: "juliagpu"
  cuda: "*"
- if: build.message !~ /\[skip tests\]/ &&
- build.message !~ /\[skip downstream\]/ &&
- !build.pull_request.draft
+ if: build.message !~ /\[skip tests\]/ && !build.pull_request.draft
  timeout_in_minutes: 60
  soft_fail:
  - exit_status: 3
@@ -240,9 +261,7 @@ steps:
  cuda: "*"
  env:
  JULIA_CUDA_USE_COMPAT: 'false' # NVIDIA bug #3418723: injection tools prevent probing libcuda
- if: build.message !~ /\[skip tests\]/ &&
- build.message !~ /\[skip sanitizer\]/ &&
- !build.pull_request.draft
+ if: build.message !~ /\[skip tests\]/ && !build.pull_request.draft
  timeout_in_minutes: 10
 
  # we want to benchmark every commit on the master branch, even if it failed CI
@@ -274,9 +293,8 @@ steps:
  agents:
  queue: "juliagpu"
  cuda: "*"
- if: build.message !~ /\[skip benchmarks\]/ &&
- build.branch !~ /^master$$/ &&
- !build.pull_request.draft
+ if: build.message !~ /\[skip benchmarks\]/ && !build.pull_request.draft &&
+ build.branch !~ /^master$$/
  timeout_in_minutes: 30
 
  # if we will submit results, use the benchmark queue so that we will
@@ -310,8 +328,7 @@ steps:
  queue: "benchmark"
  gpu: "rtx2070"
  cuda: "*"
- if: build.message !~ /\[skip benchmarks\]/ &&
- build.branch =~ /^master$$/
+ if: build.message !~ /\[skip benchmarks\]/ && build.branch =~ /^master$$/
  matrix:
  setup:
  julia:

diff --git a/LocalPreferences.toml b/LocalPreferences.toml
@@ -12,6 +12,10 @@
 # making it possible to do use cooperative multitasking.
 #nonblocking_synchronization = true
 
+# which memory type unspecified allocations should default to.
+# possible values: "device", "unified", "host"
+#default_memory = "device"
+
 [CUDA_Driver_jll]
 # whether to attempt to load a forwards-compatibile userspace driver.
 # only turn this off if you experience issues, e.g., when using a local

diff --git a/lib/cusparse/array.jl b/lib/cusparse/array.jl
@@ -417,9 +417,9 @@ Adapt.adapt_storage(::Type{CuArray}, xs::SparseMatrixCSC) = CuSparseMatrixCSC(xs
 Adapt.adapt_storage(::Type{CuArray{T}}, xs::SparseVector) where {T} = CuSparseVector{T}(xs)
 Adapt.adapt_storage(::Type{CuArray{T}}, xs::SparseMatrixCSC) where {T} = CuSparseMatrixCSC{T}(xs)
 
-Adapt.adapt_storage(::CUDA.CuArrayAdaptor, xs::AbstractSparseArray) =
+Adapt.adapt_storage(::CUDA.CuArrayKernelAdaptor, xs::AbstractSparseArray) =
  adapt(CuArray, xs)
-Adapt.adapt_storage(::CUDA.CuArrayAdaptor, xs::AbstractSparseArray{<:AbstractFloat}) =
+Adapt.adapt_storage(::CUDA.CuArrayKernelAdaptor, xs::AbstractSparseArray{<:AbstractFloat}) =
  adapt(CuArray{Float32}, xs)
 
 Adapt.adapt_storage(::Type{Array}, xs::CuSparseVector) = SparseVector(xs)
@@ -546,15 +546,15 @@ end
 
 # interop with device arrays
 
-function Adapt.adapt_structure(to::CUDA.Adaptor, x::CuSparseVector)
+function Adapt.adapt_structure(to::CUDA.KernelAdaptor, x::CuSparseVector)
  return CuSparseDeviceVector(
  adapt(to, x.iPtr),
  adapt(to, x.nzVal),
  length(x), x.nnz
  )
 end
 
-function Adapt.adapt_structure(to::CUDA.Adaptor, x::CuSparseMatrixCSR)
+function Adapt.adapt_structure(to::CUDA.KernelAdaptor, x::CuSparseMatrixCSR)
  return CuSparseDeviceMatrixCSR(
  adapt(to, x.rowPtr),
  adapt(to, x.colVal),
@@ -563,7 +563,7 @@ function Adapt.adapt_structure(to::CUDA.Adaptor, x::CuSparseMatrixCSR)
  )
 end
 
-function Adapt.adapt_structure(to::CUDA.Adaptor, x::CuSparseMatrixCSC)
+function Adapt.adapt_structure(to::CUDA.KernelAdaptor, x::CuSparseMatrixCSC)
  return CuSparseDeviceMatrixCSC(
  adapt(to, x.colPtr),
  adapt(to, x.rowVal),
@@ -572,7 +572,7 @@ function Adapt.adapt_structure(to::CUDA.Adaptor, x::CuSparseMatrixCSC)
  )
 end
 
-function Adapt.adapt_structure(to::CUDA.Adaptor, x::CuSparseMatrixBSR)
+function Adapt.adapt_structure(to::CUDA.KernelAdaptor, x::CuSparseMatrixBSR)
  return CuSparseDeviceMatrixBSR(
  adapt(to, x.rowPtr),
  adapt(to, x.colVal),
@@ -582,7 +582,7 @@ function Adapt.adapt_structure(to::CUDA.Adaptor, x::CuSparseMatrixBSR)
  )
 end
 
-function Adapt.adapt_structure(to::CUDA.Adaptor, x::CuSparseMatrixCOO)
+function Adapt.adapt_structure(to::CUDA.KernelAdaptor, x::CuSparseMatrixCOO)
  return CuSparseDeviceMatrixCOO(
  adapt(to, x.rowInd),
  adapt(to, x.colInd),