Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add copyto! and similar for Datasets #937

Merged
merged 9 commits into from
Jun 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
268 changes: 226 additions & 42 deletions src/HDF5.jl
Original file line number Diff line number Diff line change
Expand Up @@ -968,10 +968,32 @@ function Base.read(obj::DatasetOrAttribute, ::Type{String}, I...)
return val
end

"""
copyto!(output_buffer::AbstractArray{T}, obj::Union{DatasetOrAttribute}) where T

Copy [part of] a HDF5 dataset or attribute to a preallocated output buffer.
The output buffer must be convertible to a pointer and have a contiguous layout.
"""
function Base.copyto!(output_buffer::AbstractArray{T}, obj::DatasetOrAttribute, I...) where T
dtype = datatype(obj)
val = nothing
try
val = generic_read!(output_buffer, obj, dtype, T, I...)
finally
close(dtype)
end
return val
end

# Special handling for reading OPAQUE datasets and attributes
function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque})
function generic_read!(buf::Matrix{UInt8}, obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque})
generic_read(obj, filetype, Opaque, buf)
end
function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque}, buf::Union{Matrix{UInt8}, Nothing} = nothing)
sz = size(obj)
buf = Matrix{UInt8}(undef, sizeof(filetype), prod(sz))
if isnothing(buf)
buf = Matrix{UInt8}(undef, sizeof(filetype), prod(sz))
end
if obj isa Dataset
read_dataset(obj, filetype, buf, obj.xfer)
else
Expand All @@ -989,11 +1011,164 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque
end

# generic read function
function generic_read!(buf::Union{AbstractMatrix{UInt8}, AbstractArray{T}}, obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I...) where T
musm marked this conversation as resolved.
Show resolved Hide resolved
return _generic_read(obj, filetype, T, buf, I...)
end
function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I...) where T
return _generic_read(obj, filetype, T, nothing, I...)
end
function _generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T},
buf::Union{AbstractMatrix{UInt8}, AbstractArray{T}, Nothing}, I...) where T

sz, scalar, dspace = _size_of_buffer(obj, I)

if isempty(sz)
close(dspace)
return EmptyArray{T}()
end

try
if isnothing(buf)
buf = _normalized_buffer(T, sz)
else
sizeof(buf) != prod(sz)*sizeof(T) &&
error("Provided array buffer of size, $(size(buf)), and element type, $(eltype(buf)), does not match the dataset of size, $sz, and type, $T")
end
catch err
close(dspace)
rethrow(err)
end

memtype = _memtype(filetype, T)
memspace = isempty(I) ? dspace : dataspace(sz)

try
if obj isa Dataset
API.h5d_read(obj, memtype, memspace, dspace, obj.xfer, buf)
else
API.h5a_read(obj, memtype, buf)
end

if do_normalize(T)
out = reshape(normalize_types(T, buf), sz...)
else
out = buf
end

xfer_id = obj isa Dataset ? obj.xfer.id : API.H5P_DEFAULT
do_reclaim(T) && API.h5d_vlen_reclaim(memtype, memspace, xfer_id, buf)

if scalar
return out[1]
else
return out
end

finally
close(memtype)
close(memspace)
close(dspace)
end
end


"""
similar(obj::DatasetOrAttribute, [::Type{T}], [dims::Integer...]; normalize = true)

Return a `Array{T}` or `Matrix{UInt8}` to that can contain [part of] the dataset.

The `normalize` keyword will normalize the buffer for string and array datatypes.
"""
function Base.similar(
obj::DatasetOrAttribute,
::Type{T},
dims::Dims;
normalize::Bool = true
) where T
filetype = datatype(obj)
try
return similar(obj, filetype, T, dims; normalize=normalize)
finally
close(filetype)
end
end
Base.similar(
obj::DatasetOrAttribute,
::Type{T},
dims::Integer...;
normalize::Bool = true
) where T = similar(obj, T, Int.(dims); normalize=normalize)

# Base.similar without specifying the Julia type
function Base.similar(obj::DatasetOrAttribute, dims::Dims; normalize::Bool = true)
filetype = datatype(obj)
try
T = get_jl_type(filetype)
return similar(obj, filetype, T, dims; normalize=normalize)
finally
close(filetype)
end
end
Base.similar(
obj::DatasetOrAttribute,
dims::Integer...;
normalize::Bool = true
) = similar(obj, Int.(dims); normalize=normalize)

# Opaque types
function Base.similar(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque}; normalize::Bool = true)
# normalize keyword for consistency, but it is ignored for Opaque
sz = size(obj)
return Matrix{UInt8}(undef, sizeof(filetype), prod(sz))
end

# Undocumented Base.similar signature allowing filetype to be specified
function Base.similar(
obj::DatasetOrAttribute,
filetype::Datatype,
::Type{T},
dims::Dims;
normalize::Bool = true
) where T
# We are reusing code that expect indices
I = Base.OneTo.(dims)
sz, scalar, dspace = _size_of_buffer(obj, I)
memtype = _memtype(filetype, T)
try
buf = _normalized_buffer(T, sz)

if normalize && do_normalize(T)
buf = reshape(normalize_types(T, buf), sz)
end

return buf
finally
close(dspace)
close(memtype)
end
end
Base.similar(
obj::DatasetOrAttribute,
filetype::Datatype,
::Type{T},
dims::Integer...;
normalize::Bool = true
) where T = similar(obj, filetype, T, Int.(dims); normalize=normalize)

# Utilities used in Base.similar implementation

#=
_memtype(filetype::Datatype, T)

This is a utility function originall from generic_read.
It gets the native memory type for the system based on filetype, and checks
if the size matches.
=#
@inline function _memtype(filetype::Datatype, ::Type{T}) where T
!isconcretetype(T) && error("type $T is not concrete")
!isempty(I) && obj isa Attribute && error("HDF5 attributes do not support hyperslab selections")

memtype = Datatype(API.h5t_get_native_type(filetype)) # padded layout in memory
# padded layout in memory
memtype = Datatype(API.h5t_get_native_type(filetype))

if sizeof(T) != sizeof(memtype)
error("""
Expand All @@ -1003,11 +1178,37 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I.
""")
end

dspace = dataspace(obj)
return memtype
end

#=
_size_of_buffer(obj::DatasetOrAttribute, [I::Tuple, dspace::Dataspace])

This is a utility function originally from generic_read, but factored out.
The primary purpose is to determine the size and shape of the buffer to
create in order to hold the contents of a Dataset or Attribute.

# Arguments
* obj - A Dataset or Attribute
* I - (optional) indices, defaults to ()
* dspace - (optional) dataspace, defaults to dataspace(obj).
This argument will be consumed by hyperslab and returned.

# Returns
* `sz` the size of the selection
* `scalar`, which is true if the value should be read as a scalar.
* `dspace`, hyper
=#
@inline function _size_of_buffer(
obj::DatasetOrAttribute,
I::Tuple = (),
dspace::Dataspace = dataspace(obj)
)
!isempty(I) && obj isa Attribute && error("HDF5 attributes do not support hyperslab selections")

stype = API.h5s_get_simple_extent_type(dspace)
stype == API.H5S_NULL && return EmptyArray{T}()

if !isempty(I)
if !isempty(I) && stype != API.H5S_NULL
indices = Base.to_indices(obj, I)
dspace = hyperslab(dspace, indices...)
end
Expand All @@ -1016,16 +1217,32 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I.
if stype == API.H5S_SCALAR
sz = (1,)
scalar = true
elseif stype == API.H5S_NULL
sz = ()
# scalar = false
elseif isempty(I)
sz = size(dspace)
# scalar = false
else
# Determine the size by the length of non-Int indices
sz = map(length, filter(i -> !isa(i, Int), indices))
if isempty(sz)
# All indices are Int, so this is scalar
sz = (1,)
scalar = true
end
end

return sz, scalar, dspace
end

#=
_normalized_buffer(T, sz)

Return a Matrix{UInt8} for a normalized type or `Array{T}` for a regular type.
See `do_normalize` in typeconversions.jl.
=#
@inline function _normalized_buffer(::Type{T}, sz::NTuple{N, Int}) where {T, N}
if do_normalize(T)
# The entire dataset is read into in a buffer matrix where the first dimension at
# any stage of normalization is the bytes for a single element of type `T`, and
Expand All @@ -1034,32 +1251,8 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I.
else
buf = Array{T}(undef, sz...)
end
memspace = isempty(I) ? dspace : dataspace(sz)

if obj isa Dataset
API.h5d_read(obj, memtype, memspace, dspace, obj.xfer, buf)
else
API.h5a_read(obj, memtype, buf)
end

if do_normalize(T)
out = reshape(normalize_types(T, buf), sz...)
else
out = buf
end

xfer_id = obj isa Dataset ? obj.xfer.id : API.H5P_DEFAULT
do_reclaim(T) && API.h5d_vlen_reclaim(memtype, memspace, xfer_id, buf)

close(memtype)
close(memspace)
close(dspace)

if scalar
return out[1]
else
return out
end
return buf
end

# Array constructor for datasets
Expand Down Expand Up @@ -1303,18 +1496,9 @@ function Base.setindex!(dset::Dataset, X::Array{T}, I::IndexType...) where T
end

filetype = datatype(dset)
memtype = Datatype(API.h5t_get_native_type(filetype)) # padded layout in memory
memtype = _memtype(filetype, eltype(X))
close(filetype)

elT = eltype(X)
if sizeof(elT) != sizeof(memtype)
error("""
Type size mismatch
sizeof($elT) = $(sizeof(elT))
sizeof($memtype) = $(sizeof(memtype))
""")
end

dspace = dataspace(dset)
stype = API.h5s_get_simple_extent_type(dspace)
stype == API.H5S_NULL && error("attempting to write to null dataspace")
Expand Down
39 changes: 39 additions & 0 deletions test/nonallocating.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
using HDF5
using Test

@testset "non-allocating methods" begin
fn = tempname()

data = rand(UInt16, 16, 16)

h5open(fn, "w") do h5f
h5f["data"] = data
end

h5open(fn, "r") do h5f
buffer = similar(h5f["data"])
copyto!(buffer, h5f["data"])
@test isequal(buffer, data)

# Consider making this a view later
v = h5f["data"][1:4, 1:4]

buffer = similar(v)
@test size(buffer) == (4,4)
copyto!(buffer, v)
@test isequal(buffer, @view(data[1:4, 1:4]))

@test size(similar(h5f["data"], Int16)) == size(h5f["data"])
@test size(similar(h5f["data"], 5,6)) == (5, 6)
@test size(similar(h5f["data"], Int16, 8,7)) == (8,7)
@test size(similar(h5f["data"], Int16, 8,7; normalize = false)) == (8,7)
@test_broken size(similar(h5f["data"], Int8, 8,7)) == (8,7)

@test size(similar(h5f["data"], (5,6))) == (5, 6)
@test size(similar(h5f["data"], Int16, (8,7))) == (8,7)
@test size(similar(h5f["data"], Int16, (8,7); normalize = false)) == (8,7)
@test size(similar(h5f["data"], Int16, 0x8,0x7; normalize = false)) == (8,7)
end

rm(fn)
end
2 changes: 2 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ include("filter.jl")
include("chunkstorage.jl")
@debug "fileio"
include("fileio.jl")
@debug "nonallocating"
include("nonallocating.jl")
@debug "filter test utils"
include("filters/FilterTestUtils.jl")

Expand Down