Skip to content

Commit

Permalink
Use a default, global context for the first device.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Jul 12, 2017
1 parent 47fe8d3 commit ff5aa9f
Show file tree
Hide file tree
Showing 12 changed files with 28 additions and 53 deletions.
17 changes: 12 additions & 5 deletions docs/src/man/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@ Using the `@cuda` macro, you can launch the kernel on a GPU of your choice:
using CUDAdrv, CUDAnative
using Base.Test

# CUDAdrv functionality: select device, create context
dev = CuDevice(0)
ctx = CuContext(dev)

# CUDAdrv functionality: generate and upload data
a = round.(rand(Float32, (3, 4)) * 100)
b = round.(rand(Float32, (3, 4)) * 100)
Expand All @@ -44,8 +40,19 @@ d_c = similar(d_a) # output array
c = Array(d_c)

@test a+b c
```

This code is executed in a default, global context for the first device in your system. The
compiler queries the context through `CuCurrentContext`, which implies you can easily switch
contexts (using a different device, or supplying different flags) by activating a different
one:

destroy(ctx)
```julia
dev = CuDevice(0)
CuContext(dev) do ctx
# allocate things in this context
@cuda ...
end
```


Expand Down
6 changes: 0 additions & 6 deletions examples/hello_world.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,5 @@ function hello_world()
return
end

dev = CuDevice(0)
ctx = CuContext(dev)

@cuda (2,2) hello_world()

synchronize()

destroy!(ctx)
6 changes: 0 additions & 6 deletions examples/oob.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@

using CUDAdrv, CUDAnative

dev = CuDevice(0)
ctx = CuContext(dev)

a = CuArray{Float32}(10)

function memset(a, val)
Expand All @@ -20,7 +17,4 @@ function memset(a, val)
end

@cuda (1,11) memset(a, 0f0)

synchronize()

destroy!(ctx)
5 changes: 0 additions & 5 deletions examples/pairwise.jl
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,10 @@ function pairwise_dist_gpu(lat::Vector{Float32}, lon::Vector{Float32})
end


const dev = CuDevice(0)
const ctx = CuContext(dev)

# generate reasonable data
const n = 10000
const lat = rand(Float32, n) .* 45
const lon = rand(Float32, n) .* -120

using Base.Test
@test pairwise_dist_cpu(lat, lon) pairwise_dist_gpu(lat, lon)

destroy!(ctx)
5 changes: 2 additions & 3 deletions examples/reduce/benchmark.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
using BenchmarkTools
include("reduce.jl")

dev = CuDevice(0)
ctx = CuCurrentContext()
dev = device(ctx)
@assert(capability(dev) >= v"3.0", "this example requires a newer GPU")

len = 10^7
Expand All @@ -21,7 +22,6 @@ open(joinpath(@__DIR__, "reduce.jl.ptx"), "w") do f
cap=v"6.1.0")
end

ctx = CuContext(dev)
benchmark_gpu = @benchmarkable begin
gpu_reduce(+, gpu_input, gpu_output)
val = Array(gpu_output)[1]
Expand All @@ -35,7 +35,6 @@ benchmark_gpu = @benchmarkable begin
gc()
)
println(run(benchmark_gpu))
destroy!(ctx)


## CUDA
Expand Down
5 changes: 2 additions & 3 deletions examples/reduce/verify.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
using Base.Test
include("reduce.jl")

dev = CuDevice(0)
ctx = CuCurrentContext()
dev = device(ctx)
if capability(dev) < v"3.0"
warn("this example requires a newer GPU")
exit(0)
Expand All @@ -15,11 +16,9 @@ cpu_val = reduce(+, input)

# CUDAnative
let
ctx = CuContext(dev)
gpu_input = CuArray(input)
gpu_output = similar(gpu_input)
gpu_reduce(+, gpu_input, gpu_output)
gpu_val = Array(gpu_output)[1]
destroy!(ctx)
@assert cpu_val == gpu_val
end
5 changes: 0 additions & 5 deletions examples/scan.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,6 @@ function gpu_accumulate!(op::Function, data::CuDeviceMatrix{T}) where {T}
return
end

dev = CuDevice(0)
ctx = CuContext(dev)

rows = 5
cols = 4

Expand All @@ -75,8 +72,6 @@ gpu_a = CuArray(a)
using Base.Test
@test cpu_a Array(gpu_a)

destroy!(ctx)


# FURTHER IMPROVEMENTS:
# - work efficiency
Expand Down
5 changes: 0 additions & 5 deletions examples/vadd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ function kernel_vadd(a, b, c)
return nothing
end

dev = CuDevice(0)
ctx = CuContext(dev)

dims = (3,4)
a = round.(rand(Float32, dims) * 100)
b = round.(rand(Float32, dims) * 100)
Expand All @@ -23,5 +20,3 @@ len = prod(dims)
@cuda (1,len) kernel_vadd(d_a, d_b, d_c)
c = Array(d_c)
@test a+b c

destroy!(ctx)
12 changes: 12 additions & 0 deletions src/CUDAnative.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ include(joinpath("device", "libdevice.jl")) # so should get loaded late (JuliaL
include("execution.jl")
include("reflection.jl")

const default_device = Ref{CuDevice}()
const default_context = Ref{CuContext}()
function __init__()
if !configured
warn("CUDAnative.jl has not been configured, and will not work properly.")
Expand All @@ -37,6 +39,16 @@ function __init__()
VersionNumber(Base.libllvm_version) != julia_llvm_version
error("Your set-up has changed. Please run Pkg.build(\"CUDAnative\") and restart Julia.")
end

# instantiate a default device and context;
# this will be implicitly used through `CuCurrentContext`
# NOTE: although these conceptually match what the primary context is for,
# we don't use that because it is refcounted separately
# and might confuse / be confused by user operations
# (eg. calling `unsafe_reset!` on a primary context)
default_device[] = CuDevice(0)
default_context[] = CuContext(default_device[])

init_jit()
end

Expand Down
5 changes: 0 additions & 5 deletions test/perf/launch_overhead/cuda.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@ const ITERATIONS = 5000
# TODO: api-trace shows some attribute fetches, where do they come from?

function main()
dev = CuDevice(0)
ctx = CuContext(dev)

mod = CuModuleFile("cuda.ptx")
fun = CuFunction(mod, "kernel_dummy")

Expand All @@ -39,8 +36,6 @@ function main()

@printf("CPU time: %.2fus\n", median(cpu_time))
@printf("GPU time: %.2fus\n", median(gpu_time))

destroy!(ctx)
end

main()
5 changes: 0 additions & 5 deletions test/perf/launch_overhead/cudanative.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ const len = 1000
const ITERATIONS = 5000

function main()
dev = CuDevice(0)
ctx = CuContext(dev)

cpu_time = Vector{Float64}(ITERATIONS)
gpu_time = Vector{Float64}(ITERATIONS)

Expand All @@ -38,8 +35,6 @@ function main()

@printf("CPU time: %.2fus\n", median(cpu_time))
@printf("GPU time: %.2fus\n", median(gpu_time))

destroy!(ctx)
end

main()
5 changes: 0 additions & 5 deletions test/perf/launch_overhead/cudanative_profile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ const len = 1000
const ITERATIONS = 5000

function main()
dev = CuDevice(0)
ctx = CuContext(dev)

cpu_time = Vector{Float64}(ITERATIONS)

gpu_arr = CuArray{Float32}(len)
Expand All @@ -35,8 +32,6 @@ function main()

@printf("CPU time: %.2fus\n", median(cpu_time))
CUDAnative.Profile.print()

destroy!(ctx)
end

main()

0 comments on commit ff5aa9f

Please sign in to comment.