Fix-up some docstrings.

samo-lin · May 26, 2020 · 7c269ae · 7c269ae
1 parent ddaf4cb
commit 7c269ae
Show file tree

Hide file tree

Showing 8 changed files with 55 additions and 54 deletions.
diff --git a/lib/cuda/context/primary.jl b/lib/cuda/context/primary.jl
@@ -23,8 +23,8 @@ end
 Retain the primary context on the GPU, returning a context compatible with the driver API.
 The primary context will be released when the returned driver context is finalized.
 
-As these contexts are refcounted by CUDA, you should not call [`unsafe_destroy!`](@ref) on
-them but use [`unsafe_release!`](@ref) instead (available with do-block syntax as well).
+As these contexts are refcounted by CUDA, you should not call [`CUDA.unsafe_destroy!`](@ref) on
+them but use [`CUDA.unsafe_release!`](@ref) instead (available with do-block syntax as well).
 """
 function CuContext(pctx::CuPrimaryContext)
  handle = Ref{CUcontext}()
@@ -33,7 +33,7 @@ function CuContext(pctx::CuPrimaryContext)
 end
 
 """
- unsafe_release!(ctx::CuContext)
+ CUDA.unsafe_release!(ctx::CuContext)
 
 Lower the refcount of a context, possibly freeing up all resources associated with it. This
 does not respect any users of the context, and might make other objects unusable.

diff --git a/lib/cuda/module/linker.jl b/lib/cuda/module/linker.jl
@@ -1,7 +1,7 @@
 # Linking of different PTX modules
 
 export
- CuLink, add_data!, add_file!, complete
+ CuLink, CuLinkImage, add_data!, add_file!, complete
 
 
 """

diff --git a/lib/cuda/types.jl b/lib/cuda/types.jl
@@ -1,4 +1,4 @@
-export CuDim
+export CuDim3, CuDim
 
 """
  CuDim3(x)

diff --git a/src/compiler/execution.jl b/src/compiler/execution.jl
@@ -153,7 +153,7 @@ abstract type AbstractKernel{F,TT} end
  (::DeviceKernel)(args...; kwargs...)
 
 Low-level interface to call a compiled kernel, passing GPU-compatible arguments in `args`.
-For a higher-level interface, use [`CUDA.@cuda`](@ref).
+For a higher-level interface, use [`@cuda`](@ref).
 
 The following keyword arguments are supported:
 - `threads` (defaults to 1)

diff --git a/src/device/cuda/output.jl b/src/device/cuda/output.jl
@@ -169,8 +169,8 @@ pointers. For more complex output, use `@cuprintf` directly.
 Limited string interpolation is also possible:
 
 ```julia
- @cuprint("Hello, World ", 42, "\n")
- @cuprint "Hello, World \$(42)\n"
+ @cuprint("Hello, World ", 42, "\\n")
+ @cuprint "Hello, World \$(42)\\n"
 ```
 """
 macro cuprint(parts...)
@@ -217,7 +217,8 @@ export @cushow
 """
  @cushow(ex)
 
-GPU analog of `Base.@show`. It comes with the same type restrictions as [@cuprint](@ref).
+GPU analog of `Base.@show`. It comes with the same type restrictions as [`@cuprintf`](@ref).
+
 ```julia
 @cushow threadIdx().x
 ```

diff --git a/src/device/cuda/wmma.jl b/src/device/cuda/wmma.jl
@@ -83,7 +83,7 @@ end
 # -----------
 
 @doc """
- llvm_wmma_load_{matrix}_{layout}_{shape}_{addr_space}_stride_{elem_type}(src_addr, stride)
+ WMMA.llvm_wmma_load_{matrix}_{layout}_{shape}_{addr_space}_stride_{elem_type}(src_addr, stride)
 
 Wrapper around the LLVM intrinsic `@llvm.nvvm.wmma.load.{matrix}.sync.{layout}.{shape}.{addr_space}.stride.{elem_type}`.
 
@@ -141,7 +141,7 @@ end
 # ------------
 
 @doc """
- llvm_wmma_store_d_{layout}_{shape}_{addr_space}_stride_{elem_type}(dst_addr, data, stride)
+ WMMA.llvm_wmma_store_d_{layout}_{shape}_{addr_space}_stride_{elem_type}(dst_addr, data, stride)
 
 Wrapper around the LLVM intrinsic `@llvm.nvvm.wmma.store.d.sync.{layout}.{shape}.{addr_space}.stride.{elem_type}`.
 
@@ -195,7 +195,7 @@ end
 # --------------------------
 
 @doc """
- llvm_wmma_mma_{a_layout}_{b_layout}_{shape}_{d_elem_type}_{c_elem_type}(a, b, c)
+ WMMA.llvm_wmma_mma_{a_layout}_{b_layout}_{shape}_{d_elem_type}_{c_elem_type}(a, b, c)
 
 Wrapper around the LLVM intrinsic `@llvm.nvvm.wmma.mma.sync.{a_layout}.{b_layout}.{shape}.{d_elem_type}.{c_elem_type}`.
 
@@ -304,30 +304,30 @@ end
 export FragmentLayout, RowMajor, ColMajor, Unspecified
 
 """
- FragmentLayout
+ WMMA.FragmentLayout
 
 Abstract type that specifies the storage layout of a matrix.
 
-Possible values are [`RowMajor`](@ref), [`ColMajor`](@ref) and [`Unspecified`](@ref).
+Possible values are [`WMMA.RowMajor`](@ref), [`WMMA.ColMajor`](@ref) and [`WMMA.Unspecified`](@ref).
 """
 abstract type FragmentLayout end
 
 """
- RowMajor
+ WMMA.RowMajor
 
 Type that represents a matrix stored in row major (C style) order.
 """
 struct RowMajor <: FragmentLayout end
 
 """
- ColMajor
+ WMMA.ColMajor
 
 Type that represents a matrix stored in column major (Julia style) order.
 """
 struct ColMajor <: FragmentLayout end
 
 """
- Unspecified
+ WMMA.Unspecified
 
 Type that represents a matrix stored in an unspecified order.
 
@@ -349,7 +349,7 @@ struct Accumulator <: FragmentUse end
 export Fragment
 
 """
- Fragment
+ WMMA.Fragment
 
 Type that represents per-thread intermediate results of WMMA operations.
 
@@ -374,7 +374,7 @@ end
 export Config
 
 """
- Config{M, N, K, d_type}
+ WMMA.Config{M, N, K, d_type}
 
 Type that contains all information for WMMA operations that cannot be inferred from the argument's types.
 
@@ -483,19 +483,19 @@ end
 export load_a, load_b, load_c
 
 """
- load_a(addr, stride, layout, config)
- load_b(addr, stride, layout, config)
- load_c(addr, stride, layout, config)
+ WMMA.load_a(addr, stride, layout, config)
+ WMMA.load_b(addr, stride, layout, config)
+ WMMA.load_c(addr, stride, layout, config)
 
-Load the matrix `a`, `b` or `c` from the memory location indicated by `addr`, and return the resulting [`Fragment`](@ref).
+Load the matrix `a`, `b` or `c` from the memory location indicated by `addr`, and return the resulting [`WMMA.Fragment`](@ref).
 
 # Arguments
 - `addr`: The address to load the matrix from.
 - `stride`: The leading dimension of the matrix pointed to by `addr`, specified in number of elements.
-- `layout`: The storage layout of the matrix. Possible values are [`RowMajor`](@ref) and [`ColMajor`](@ref).
-- `config`: The WMMA configuration that should be used for loading this matrix. See [`Config`](@ref).
+- `layout`: The storage layout of the matrix. Possible values are [`WMMA.RowMajor`](@ref) and [`WMMA.ColMajor`](@ref).
+- `config`: The WMMA configuration that should be used for loading this matrix. See [`WMMA.Config`](@ref).
 
-See also: [`Fragment`](@ref), [`FragmentLayout`](@ref), [`Config`](@ref)
+See also: [`WMMA.Fragment`](@ref), [`WMMA.FragmentLayout`](@ref), [`WMMA.Config`](@ref)
 
 !!! warning
 
@@ -537,16 +537,16 @@ end
 export mma
 
 """
- mma(a, b, c, conf)
+ WMMA.mma(a, b, c, conf)
 
 Perform the matrix multiply-accumulate operation ``D = A \\cdot B + C``.
 
 # Arguments
 
-- `a`: The [`Fragment`](@ref) corresponding to the matrix ``A``.
-- `b`: The [`Fragment`](@ref) corresponding to the matrix ``B``.
-- `c`: The [`Fragment`](@ref) corresponding to the matrix ``C``.
-- `conf`: The [`Config`](@ref) that should be used in this WMMA operation.
+- `a`: The [`WMMA.Fragment`](@ref) corresponding to the matrix ``A``.
+- `b`: The [`WMMA.Fragment`](@ref) corresponding to the matrix ``B``.
+- `c`: The [`WMMA.Fragment`](@ref) corresponding to the matrix ``C``.
+- `conf`: The [`WMMA.Config`](@ref) that should be used in this WMMA operation.
 
 !!! warning
 
@@ -590,18 +590,18 @@ end
 export store_d
 
 """
- store_d(addr, d, stride, layout, config)
+ WMMA.store_d(addr, d, stride, layout, config)
 
 Store the result matrix `d` to the memory location indicated by `addr`.
 
 # Arguments
 - `addr`: The address to store the matrix to.
-- `d`: The [`Fragment`](@ref) corresponding to the `d` matrix.
+- `d`: The [`WMMA.Fragment`](@ref) corresponding to the `d` matrix.
 - `stride`: The leading dimension of the matrix pointed to by `addr`, specified in number of elements.
-- `layout`: The storage layout of the matrix. Possible values are [`RowMajor`](@ref) and [`ColMajor`](@ref).
-- `config`: The WMMA configuration that should be used for storing this matrix. See [`Config`](@ref).
+- `layout`: The storage layout of the matrix. Possible values are [`WMMA.RowMajor`](@ref) and [`WMMA.ColMajor`](@ref).
+- `config`: The WMMA configuration that should be used for storing this matrix. See [`WMMA.Config`](@ref).
 
-See also: [`Fragment`](@ref), [`FragmentLayout`](@ref), [`Config`](@ref)
+See also: [`WMMA.Fragment`](@ref), [`WMMA.FragmentLayout`](@ref), [`WMMA.Config`](@ref)
 
 !!! warning
 
@@ -639,15 +639,15 @@ end
 export fill_c
 
 """
- fill_c(value, config)
+ WMMA.fill_c(value, config)
 
-Return a [`Fragment`](@ref) filled with the value `value`.
+Return a [`WMMA.Fragment`](@ref) filled with the value `value`.
 
 This operation is useful if you want to implement a matrix multiplication (and thus want to set ``C = O``).
 
 # Arguments
 - `value`: The value used to fill the fragment. Can be a `Float16` or `Float32`.
-- `config`: The WMMA configuration that should be used for this WMMA operation. See [`Config`](@ref).
+- `config`: The WMMA configuration that should be used for this WMMA operation. See [`WMMA.Config`](@ref).
 """
 fill_c
 

diff --git a/src/initialization.jl b/src/initialization.jl
@@ -128,7 +128,7 @@ end
 
 # TODO: update docstrings
 
-export has_cuda, has_cuda_gpu, usable_cuda_gpus
+export has_cuda, has_cuda_gpu
 
 """
  has_cuda()::Bool

diff --git a/test/device/wmma.jl b/test/device/wmma.jl
@@ -149,23 +149,23 @@ end
 
 @testset "Flattening/unflattening" begin
  @testset "Flattening" begin
- @test CUDA.WMMA.flatten(5) == (5,)
- @test CUDA.WMMA.flatten(5.0) == (5.0,)
- @test CUDA.WMMA.flatten(VecElement{Float16}(5)) == (Float16(5),)
- @test CUDA.WMMA.flatten(ntuple(i -> i, 8)) == ntuple(i -> i, 8)
- @test CUDA.WMMA.flatten(ntuple(i -> VecElement{Float16}(i), 8)) == ntuple(i -> Float16(i), 8)
- @test CUDA.WMMA.flatten(ntuple(i -> ntuple(j -> (i-1) * 2 + j, 2), 8)) == ntuple(i -> i, 2 * 8)
- @test CUDA.WMMA.flatten(ntuple(i -> ntuple(j -> VecElement{Float16}((i-1) * 2 + j), 2), 8)) == ntuple(i -> Float16(i), 2 * 8)
+ @test WMMA.flatten(5) == (5,)
+ @test WMMA.flatten(5.0) == (5.0,)
+ @test WMMA.flatten(VecElement{Float16}(5)) == (Float16(5),)
+ @test WMMA.flatten(ntuple(i -> i, 8)) == ntuple(i -> i, 8)
+ @test WMMA.flatten(ntuple(i -> VecElement{Float16}(i), 8)) == ntuple(i -> Float16(i), 8)
+ @test WMMA.flatten(ntuple(i -> ntuple(j -> (i-1) * 2 + j, 2), 8)) == ntuple(i -> i, 2 * 8)
+ @test WMMA.flatten(ntuple(i -> ntuple(j -> VecElement{Float16}((i-1) * 2 + j), 2), 8)) == ntuple(i -> Float16(i), 2 * 8)
  end
 
  @testset "Unflattening" begin
- @test CUDA.WMMA.unflatten(Int64, (5,)) == 5
- @test CUDA.WMMA.unflatten(Float64, (5.0,)) == 5.0
- @test CUDA.WMMA.unflatten(VecElement{Float16}, (Float16(5),)) == VecElement{Float16}(5)
- @test CUDA.WMMA.unflatten(NTuple{8, Int64}, ntuple(i -> i, 8)) == ntuple(i -> i, 8)
- @test CUDA.WMMA.unflatten(NTuple{8, VecElement{Float16}}, ntuple(i -> Float16(i), 8)) == ntuple(i -> VecElement{Float16}(i), 8)
- @test CUDA.WMMA.unflatten(NTuple{8, NTuple{2, Int64}}, ntuple(i -> i, 2 * 8)) == ntuple(i -> ntuple(j -> (i-1) * 2 + j, 2), 8)
- @test CUDA.WMMA.unflatten(NTuple{8, NTuple{2, VecElement{Float16}}}, ntuple(i -> Float16(i), 2 * 8)) == ntuple(i -> ntuple(j -> VecElement{Float16}((i-1) * 2 + j), 2), 8)
+ @test WMMA.unflatten(Int64, (5,)) == 5
+ @test WMMA.unflatten(Float64, (5.0,)) == 5.0
+ @test WMMA.unflatten(VecElement{Float16}, (Float16(5),)) == VecElement{Float16}(5)
+ @test WMMA.unflatten(NTuple{8, Int64}, ntuple(i -> i, 8)) == ntuple(i -> i, 8)
+ @test WMMA.unflatten(NTuple{8, VecElement{Float16}}, ntuple(i -> Float16(i), 8)) == ntuple(i -> VecElement{Float16}(i), 8)
+ @test WMMA.unflatten(NTuple{8, NTuple{2, Int64}}, ntuple(i -> i, 2 * 8)) == ntuple(i -> ntuple(j -> (i-1) * 2 + j, 2), 8)
+ @test WMMA.unflatten(NTuple{8, NTuple{2, VecElement{Float16}}}, ntuple(i -> Float16(i), 2 * 8)) == ntuple(i -> ntuple(j -> VecElement{Float16}((i-1) * 2 + j), 2), 8)
  end
 end