Skip to content

Commit

Permalink
Fix-up some docstrings.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed May 26, 2020
1 parent ddaf4cb commit 7c269ae
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 54 deletions.
6 changes: 3 additions & 3 deletions lib/cuda/context/primary.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ end
Retain the primary context on the GPU, returning a context compatible with the driver API.
The primary context will be released when the returned driver context is finalized.
As these contexts are refcounted by CUDA, you should not call [`unsafe_destroy!`](@ref) on
them but use [`unsafe_release!`](@ref) instead (available with do-block syntax as well).
As these contexts are refcounted by CUDA, you should not call [`CUDA.unsafe_destroy!`](@ref) on
them but use [`CUDA.unsafe_release!`](@ref) instead (available with do-block syntax as well).
"""
function CuContext(pctx::CuPrimaryContext)
handle = Ref{CUcontext}()
Expand All @@ -33,7 +33,7 @@ function CuContext(pctx::CuPrimaryContext)
end

"""
unsafe_release!(ctx::CuContext)
CUDA.unsafe_release!(ctx::CuContext)
Lower the refcount of a context, possibly freeing up all resources associated with it. This
does not respect any users of the context, and might make other objects unusable.
Expand Down
2 changes: 1 addition & 1 deletion lib/cuda/module/linker.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Linking of different PTX modules

export
CuLink, add_data!, add_file!, complete
CuLink, CuLinkImage, add_data!, add_file!, complete


"""
Expand Down
2 changes: 1 addition & 1 deletion lib/cuda/types.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export CuDim
export CuDim3, CuDim

"""
CuDim3(x)
Expand Down
2 changes: 1 addition & 1 deletion src/compiler/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ abstract type AbstractKernel{F,TT} end
(::DeviceKernel)(args...; kwargs...)
Low-level interface to call a compiled kernel, passing GPU-compatible arguments in `args`.
For a higher-level interface, use [`CUDA.@cuda`](@ref).
For a higher-level interface, use [`@cuda`](@ref).
The following keyword arguments are supported:
- `threads` (defaults to 1)
Expand Down
7 changes: 4 additions & 3 deletions src/device/cuda/output.jl
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@ pointers. For more complex output, use `@cuprintf` directly.
Limited string interpolation is also possible:
```julia
@cuprint("Hello, World ", 42, "\n")
@cuprint "Hello, World \$(42)\n"
@cuprint("Hello, World ", 42, "\\n")
@cuprint "Hello, World \$(42)\\n"
```
"""
macro cuprint(parts...)
Expand Down Expand Up @@ -217,7 +217,8 @@ export @cushow
"""
@cushow(ex)
GPU analog of `Base.@show`. It comes with the same type restrictions as [@cuprint](@ref).
GPU analog of `Base.@show`. It comes with the same type restrictions as [`@cuprintf`](@ref).
```julia
@cushow threadIdx().x
```
Expand Down
60 changes: 30 additions & 30 deletions src/device/cuda/wmma.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ end
# -----------

@doc """
llvm_wmma_load_{matrix}_{layout}_{shape}_{addr_space}_stride_{elem_type}(src_addr, stride)
WMMA.llvm_wmma_load_{matrix}_{layout}_{shape}_{addr_space}_stride_{elem_type}(src_addr, stride)
Wrapper around the LLVM intrinsic `@llvm.nvvm.wmma.load.{matrix}.sync.{layout}.{shape}.{addr_space}.stride.{elem_type}`.
Expand Down Expand Up @@ -141,7 +141,7 @@ end
# ------------

@doc """
llvm_wmma_store_d_{layout}_{shape}_{addr_space}_stride_{elem_type}(dst_addr, data, stride)
WMMA.llvm_wmma_store_d_{layout}_{shape}_{addr_space}_stride_{elem_type}(dst_addr, data, stride)
Wrapper around the LLVM intrinsic `@llvm.nvvm.wmma.store.d.sync.{layout}.{shape}.{addr_space}.stride.{elem_type}`.
Expand Down Expand Up @@ -195,7 +195,7 @@ end
# --------------------------

@doc """
llvm_wmma_mma_{a_layout}_{b_layout}_{shape}_{d_elem_type}_{c_elem_type}(a, b, c)
WMMA.llvm_wmma_mma_{a_layout}_{b_layout}_{shape}_{d_elem_type}_{c_elem_type}(a, b, c)
Wrapper around the LLVM intrinsic `@llvm.nvvm.wmma.mma.sync.{a_layout}.{b_layout}.{shape}.{d_elem_type}.{c_elem_type}`.
Expand Down Expand Up @@ -304,30 +304,30 @@ end
export FragmentLayout, RowMajor, ColMajor, Unspecified

"""
FragmentLayout
WMMA.FragmentLayout
Abstract type that specifies the storage layout of a matrix.
Possible values are [`RowMajor`](@ref), [`ColMajor`](@ref) and [`Unspecified`](@ref).
Possible values are [`WMMA.RowMajor`](@ref), [`WMMA.ColMajor`](@ref) and [`WMMA.Unspecified`](@ref).
"""
abstract type FragmentLayout end

"""
RowMajor
WMMA.RowMajor
Type that represents a matrix stored in row major (C style) order.
"""
struct RowMajor <: FragmentLayout end

"""
ColMajor
WMMA.ColMajor
Type that represents a matrix stored in column major (Julia style) order.
"""
struct ColMajor <: FragmentLayout end

"""
Unspecified
WMMA.Unspecified
Type that represents a matrix stored in an unspecified order.
Expand All @@ -349,7 +349,7 @@ struct Accumulator <: FragmentUse end
export Fragment

"""
Fragment
WMMA.Fragment
Type that represents per-thread intermediate results of WMMA operations.
Expand All @@ -374,7 +374,7 @@ end
export Config

"""
Config{M, N, K, d_type}
WMMA.Config{M, N, K, d_type}
Type that contains all information for WMMA operations that cannot be inferred from the argument's types.
Expand Down Expand Up @@ -483,19 +483,19 @@ end
export load_a, load_b, load_c

"""
load_a(addr, stride, layout, config)
load_b(addr, stride, layout, config)
load_c(addr, stride, layout, config)
WMMA.load_a(addr, stride, layout, config)
WMMA.load_b(addr, stride, layout, config)
WMMA.load_c(addr, stride, layout, config)
Load the matrix `a`, `b` or `c` from the memory location indicated by `addr`, and return the resulting [`Fragment`](@ref).
Load the matrix `a`, `b` or `c` from the memory location indicated by `addr`, and return the resulting [`WMMA.Fragment`](@ref).
# Arguments
- `addr`: The address to load the matrix from.
- `stride`: The leading dimension of the matrix pointed to by `addr`, specified in number of elements.
- `layout`: The storage layout of the matrix. Possible values are [`RowMajor`](@ref) and [`ColMajor`](@ref).
- `config`: The WMMA configuration that should be used for loading this matrix. See [`Config`](@ref).
- `layout`: The storage layout of the matrix. Possible values are [`WMMA.RowMajor`](@ref) and [`WMMA.ColMajor`](@ref).
- `config`: The WMMA configuration that should be used for loading this matrix. See [`WMMA.Config`](@ref).
See also: [`Fragment`](@ref), [`FragmentLayout`](@ref), [`Config`](@ref)
See also: [`WMMA.Fragment`](@ref), [`WMMA.FragmentLayout`](@ref), [`WMMA.Config`](@ref)
!!! warning
Expand Down Expand Up @@ -537,16 +537,16 @@ end
export mma

"""
mma(a, b, c, conf)
WMMA.mma(a, b, c, conf)
Perform the matrix multiply-accumulate operation ``D = A \\cdot B + C``.
# Arguments
- `a`: The [`Fragment`](@ref) corresponding to the matrix ``A``.
- `b`: The [`Fragment`](@ref) corresponding to the matrix ``B``.
- `c`: The [`Fragment`](@ref) corresponding to the matrix ``C``.
- `conf`: The [`Config`](@ref) that should be used in this WMMA operation.
- `a`: The [`WMMA.Fragment`](@ref) corresponding to the matrix ``A``.
- `b`: The [`WMMA.Fragment`](@ref) corresponding to the matrix ``B``.
- `c`: The [`WMMA.Fragment`](@ref) corresponding to the matrix ``C``.
- `conf`: The [`WMMA.Config`](@ref) that should be used in this WMMA operation.
!!! warning
Expand Down Expand Up @@ -590,18 +590,18 @@ end
export store_d

"""
store_d(addr, d, stride, layout, config)
WMMA.store_d(addr, d, stride, layout, config)
Store the result matrix `d` to the memory location indicated by `addr`.
# Arguments
- `addr`: The address to store the matrix to.
- `d`: The [`Fragment`](@ref) corresponding to the `d` matrix.
- `d`: The [`WMMA.Fragment`](@ref) corresponding to the `d` matrix.
- `stride`: The leading dimension of the matrix pointed to by `addr`, specified in number of elements.
- `layout`: The storage layout of the matrix. Possible values are [`RowMajor`](@ref) and [`ColMajor`](@ref).
- `config`: The WMMA configuration that should be used for storing this matrix. See [`Config`](@ref).
- `layout`: The storage layout of the matrix. Possible values are [`WMMA.RowMajor`](@ref) and [`WMMA.ColMajor`](@ref).
- `config`: The WMMA configuration that should be used for storing this matrix. See [`WMMA.Config`](@ref).
See also: [`Fragment`](@ref), [`FragmentLayout`](@ref), [`Config`](@ref)
See also: [`WMMA.Fragment`](@ref), [`WMMA.FragmentLayout`](@ref), [`WMMA.Config`](@ref)
!!! warning
Expand Down Expand Up @@ -639,15 +639,15 @@ end
export fill_c

"""
fill_c(value, config)
WMMA.fill_c(value, config)
Return a [`Fragment`](@ref) filled with the value `value`.
Return a [`WMMA.Fragment`](@ref) filled with the value `value`.
This operation is useful if you want to implement a matrix multiplication (and thus want to set ``C = O``).
# Arguments
- `value`: The value used to fill the fragment. Can be a `Float16` or `Float32`.
- `config`: The WMMA configuration that should be used for this WMMA operation. See [`Config`](@ref).
- `config`: The WMMA configuration that should be used for this WMMA operation. See [`WMMA.Config`](@ref).
"""
fill_c

Expand Down
2 changes: 1 addition & 1 deletion src/initialization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ end

# TODO: update docstrings

export has_cuda, has_cuda_gpu, usable_cuda_gpus
export has_cuda, has_cuda_gpu

"""
has_cuda()::Bool
Expand Down
28 changes: 14 additions & 14 deletions test/device/wmma.jl
Original file line number Diff line number Diff line change
Expand Up @@ -149,23 +149,23 @@ end

@testset "Flattening/unflattening" begin
@testset "Flattening" begin
@test CUDA.WMMA.flatten(5) == (5,)
@test CUDA.WMMA.flatten(5.0) == (5.0,)
@test CUDA.WMMA.flatten(VecElement{Float16}(5)) == (Float16(5),)
@test CUDA.WMMA.flatten(ntuple(i -> i, 8)) == ntuple(i -> i, 8)
@test CUDA.WMMA.flatten(ntuple(i -> VecElement{Float16}(i), 8)) == ntuple(i -> Float16(i), 8)
@test CUDA.WMMA.flatten(ntuple(i -> ntuple(j -> (i-1) * 2 + j, 2), 8)) == ntuple(i -> i, 2 * 8)
@test CUDA.WMMA.flatten(ntuple(i -> ntuple(j -> VecElement{Float16}((i-1) * 2 + j), 2), 8)) == ntuple(i -> Float16(i), 2 * 8)
@test WMMA.flatten(5) == (5,)
@test WMMA.flatten(5.0) == (5.0,)
@test WMMA.flatten(VecElement{Float16}(5)) == (Float16(5),)
@test WMMA.flatten(ntuple(i -> i, 8)) == ntuple(i -> i, 8)
@test WMMA.flatten(ntuple(i -> VecElement{Float16}(i), 8)) == ntuple(i -> Float16(i), 8)
@test WMMA.flatten(ntuple(i -> ntuple(j -> (i-1) * 2 + j, 2), 8)) == ntuple(i -> i, 2 * 8)
@test WMMA.flatten(ntuple(i -> ntuple(j -> VecElement{Float16}((i-1) * 2 + j), 2), 8)) == ntuple(i -> Float16(i), 2 * 8)
end

@testset "Unflattening" begin
@test CUDA.WMMA.unflatten(Int64, (5,)) == 5
@test CUDA.WMMA.unflatten(Float64, (5.0,)) == 5.0
@test CUDA.WMMA.unflatten(VecElement{Float16}, (Float16(5),)) == VecElement{Float16}(5)
@test CUDA.WMMA.unflatten(NTuple{8, Int64}, ntuple(i -> i, 8)) == ntuple(i -> i, 8)
@test CUDA.WMMA.unflatten(NTuple{8, VecElement{Float16}}, ntuple(i -> Float16(i), 8)) == ntuple(i -> VecElement{Float16}(i), 8)
@test CUDA.WMMA.unflatten(NTuple{8, NTuple{2, Int64}}, ntuple(i -> i, 2 * 8)) == ntuple(i -> ntuple(j -> (i-1) * 2 + j, 2), 8)
@test CUDA.WMMA.unflatten(NTuple{8, NTuple{2, VecElement{Float16}}}, ntuple(i -> Float16(i), 2 * 8)) == ntuple(i -> ntuple(j -> VecElement{Float16}((i-1) * 2 + j), 2), 8)
@test WMMA.unflatten(Int64, (5,)) == 5
@test WMMA.unflatten(Float64, (5.0,)) == 5.0
@test WMMA.unflatten(VecElement{Float16}, (Float16(5),)) == VecElement{Float16}(5)
@test WMMA.unflatten(NTuple{8, Int64}, ntuple(i -> i, 8)) == ntuple(i -> i, 8)
@test WMMA.unflatten(NTuple{8, VecElement{Float16}}, ntuple(i -> Float16(i), 8)) == ntuple(i -> VecElement{Float16}(i), 8)
@test WMMA.unflatten(NTuple{8, NTuple{2, Int64}}, ntuple(i -> i, 2 * 8)) == ntuple(i -> ntuple(j -> (i-1) * 2 + j, 2), 8)
@test WMMA.unflatten(NTuple{8, NTuple{2, VecElement{Float16}}}, ntuple(i -> Float16(i), 2 * 8)) == ntuple(i -> ntuple(j -> VecElement{Float16}((i-1) * 2 + j), 2), 8)
end
end

Expand Down

0 comments on commit 7c269ae

Please sign in to comment.