JuliaIO · musm · Jun 7, 2022 · May 13, 2022 · May 29, 2022 · Jun 6, 2022
diff --git a/src/HDF5.jl b/src/HDF5.jl
@@ -968,10 +968,32 @@ function Base.read(obj::DatasetOrAttribute, ::Type{String}, I...)
  return val
 end
 
+"""
+ copyto!(output_buffer::AbstractArray{T}, obj::Union{DatasetOrAttribute}) where T
+
+Copy [part of] a HDF5 dataset or attribute to a preallocated output buffer.
+The output buffer must be convertible to a pointer and have a contiguous layout.
+"""
+function Base.copyto!(output_buffer::AbstractArray{T}, obj::DatasetOrAttribute, I...) where T
+ dtype = datatype(obj)
+ val = nothing
+ try
+ val = generic_read!(output_buffer, obj, dtype, T, I...)
+ finally
+ close(dtype)
+ end
+ return val
+end
+
 # Special handling for reading OPAQUE datasets and attributes
-function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque})
+function generic_read!(buf::Matrix{UInt8}, obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque})
+ generic_read(obj, filetype, Opaque, buf)
+end
+function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque}, buf::Union{Matrix{UInt8}, Nothing} = nothing)
  sz = size(obj)
- buf = Matrix{UInt8}(undef, sizeof(filetype), prod(sz))
+ if isnothing(buf)
+ buf = Matrix{UInt8}(undef, sizeof(filetype), prod(sz))
+ end
  if obj isa Dataset
  read_dataset(obj, filetype, buf, obj.xfer)
  else
@@ -989,11 +1011,164 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque
 end
 
 # generic read function
+function generic_read!(buf::Union{AbstractMatrix{UInt8}, AbstractArray{T}}, obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I...) where T
+ return _generic_read(obj, filetype, T, buf, I...)
+end
 function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I...) where T
+ return _generic_read(obj, filetype, T, nothing, I...)
+end
+function _generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T},
+ buf::Union{AbstractMatrix{UInt8}, AbstractArray{T}, Nothing}, I...) where T
+
+ sz, scalar, dspace = _size_of_buffer(obj, I)
+
+ if isempty(sz)
+ close(dspace)
+ return EmptyArray{T}()
+ end
+
+ try
+ if isnothing(buf)
+ buf = _normalized_buffer(T, sz)
+ else
+ sizeof(buf) != prod(sz)*sizeof(T) &&
+ error("Provided array buffer of size, $(size(buf)), and element type, $(eltype(buf)), does not match the dataset of size, $sz, and type, $T")
+ end
+ catch err
+ close(dspace)
+ rethrow(err)
+ end
+
+ memtype = _memtype(filetype, T)
+ memspace = isempty(I) ? dspace : dataspace(sz)
+
+ try
+ if obj isa Dataset
+ API.h5d_read(obj, memtype, memspace, dspace, obj.xfer, buf)
+ else
+ API.h5a_read(obj, memtype, buf)
+ end
+
+ if do_normalize(T)
+ out = reshape(normalize_types(T, buf), sz...)
+ else
+ out = buf
+ end
+
+ xfer_id = obj isa Dataset ? obj.xfer.id : API.H5P_DEFAULT
+ do_reclaim(T) && API.h5d_vlen_reclaim(memtype, memspace, xfer_id, buf)
+
+ if scalar
+ return out[1]
+ else
+ return out
+ end
+
+ finally
+ close(memtype)
+ close(memspace)
+ close(dspace)
+ end
+end
+
+
+"""
+ similar(obj::DatasetOrAttribute, [::Type{T}], [dims::Integer...]; normalize = true)
+
+Return a `Array{T}` or `Matrix{UInt8}` to that can contain [part of] the dataset.
+
+The `normalize` keyword will normalize the buffer for string and array datatypes.
+"""
+function Base.similar(
+ obj::DatasetOrAttribute,
+ ::Type{T},
+ dims::Dims;
+ normalize::Bool = true
+) where T
+ filetype = datatype(obj)
+ try
+ return similar(obj, filetype, T, dims; normalize=normalize)
+ finally
+ close(filetype)
+ end
+end
+Base.similar(
+ obj::DatasetOrAttribute,
+ ::Type{T},
+ dims::Integer...;
+ normalize::Bool = true
+) where T = similar(obj, T, Int.(dims); normalize=normalize)
+
+# Base.similar without specifying the Julia type
+function Base.similar(obj::DatasetOrAttribute, dims::Dims; normalize::Bool = true)
+ filetype = datatype(obj)
+ try
+ T = get_jl_type(filetype)
+ return similar(obj, filetype, T, dims; normalize=normalize)
+ finally
+ close(filetype)
+ end
+end
+Base.similar(
+ obj::DatasetOrAttribute,
+ dims::Integer...;
+ normalize::Bool = true
+) = similar(obj, Int.(dims); normalize=normalize)
+
+# Opaque types
+function Base.similar(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque}; normalize::Bool = true)
+ # normalize keyword for consistency, but it is ignored for Opaque
+ sz = size(obj)
+ return Matrix{UInt8}(undef, sizeof(filetype), prod(sz))
+end
+
+# Undocumented Base.similar signature allowing filetype to be specified
+function Base.similar(
+ obj::DatasetOrAttribute,
+ filetype::Datatype,
+ ::Type{T},
+ dims::Dims;
+ normalize::Bool = true
+) where T
+ # We are reusing code that expect indices
+ I = Base.OneTo.(dims)
+ sz, scalar, dspace = _size_of_buffer(obj, I)
+ memtype = _memtype(filetype, T)
+ try
+ buf = _normalized_buffer(T, sz)
+
+ if normalize && do_normalize(T)
+ buf = reshape(normalize_types(T, buf), sz)
+ end
+
+ return buf
+ finally
+ close(dspace)
+ close(memtype)
+ end
+end
+Base.similar(
+ obj::DatasetOrAttribute,
+ filetype::Datatype,
+ ::Type{T},
+ dims::Integer...;
+ normalize::Bool = true
+) where T = similar(obj, filetype, T, Int.(dims); normalize=normalize)
+
+# Utilities used in Base.similar implementation
+
+#=
+ _memtype(filetype::Datatype, T)
+
+This is a utility function originall from generic_read.
+It gets the native memory type for the system based on filetype, and checks
+if the size matches.
+=#
+@inline function _memtype(filetype::Datatype, ::Type{T}) where T
  !isconcretetype(T) && error("type $T is not concrete")
- !isempty(I) && obj isa Attribute && error("HDF5 attributes do not support hyperslab selections")
 
- memtype = Datatype(API.h5t_get_native_type(filetype)) # padded layout in memory
+ # padded layout in memory
+ memtype = Datatype(API.h5t_get_native_type(filetype))
 
  if sizeof(T) != sizeof(memtype)
  error("""
@@ -1003,11 +1178,37 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I.
  """)
  end
 
- dspace = dataspace(obj)
+ return memtype
+end
+
+#=
+ _size_of_buffer(obj::DatasetOrAttribute, [I::Tuple, dspace::Dataspace])
+
+This is a utility function originally from generic_read, but factored out.
+The primary purpose is to determine the size and shape of the buffer to
+create in order to hold the contents of a Dataset or Attribute.
+
+# Arguments
+* obj - A Dataset or Attribute
+* I - (optional) indices, defaults to ()
+* dspace - (optional) dataspace, defaults to dataspace(obj).
+ This argument will be consumed by hyperslab and returned.
+
+# Returns
+* `sz` the size of the selection
+* `scalar`, which is true if the value should be read as a scalar.
+* `dspace`, hyper
+=#
+@inline function _size_of_buffer(
+ obj::DatasetOrAttribute,
+ I::Tuple = (),
+ dspace::Dataspace = dataspace(obj)
+)
+ !isempty(I) && obj isa Attribute && error("HDF5 attributes do not support hyperslab selections")
+
  stype = API.h5s_get_simple_extent_type(dspace)
- stype == API.H5S_NULL && return EmptyArray{T}()
 
- if !isempty(I)
+ if !isempty(I) && stype != API.H5S_NULL
  indices = Base.to_indices(obj, I)
  dspace = hyperslab(dspace, indices...)
  end
@@ -1016,16 +1217,32 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I.
  if stype == API.H5S_SCALAR
  sz = (1,)
  scalar = true
+ elseif stype == API.H5S_NULL
+ sz = ()
+ # scalar = false
  elseif isempty(I)
  sz = size(dspace)
+ # scalar = false
  else
+ # Determine the size by the length of non-Int indices
  sz = map(length, filter(i -> !isa(i, Int), indices))
  if isempty(sz)
+ # All indices are Int, so this is scalar
  sz = (1,)
  scalar = true
  end
  end
 
+ return sz, scalar, dspace
+end
+
+#=
+ _normalized_buffer(T, sz)
+
+Return a Matrix{UInt8} for a normalized type or `Array{T}` for a regular type.
+See `do_normalize` in typeconversions.jl.
+=#
+@inline function _normalized_buffer(::Type{T}, sz::NTuple{N, Int}) where {T, N}
  if do_normalize(T)
  # The entire dataset is read into in a buffer matrix where the first dimension at
  # any stage of normalization is the bytes for a single element of type `T`, and
@@ -1034,32 +1251,8 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I.
  else
  buf = Array{T}(undef, sz...)
  end
- memspace = isempty(I) ? dspace : dataspace(sz)
-
- if obj isa Dataset
- API.h5d_read(obj, memtype, memspace, dspace, obj.xfer, buf)
- else
- API.h5a_read(obj, memtype, buf)
- end
-
- if do_normalize(T)
- out = reshape(normalize_types(T, buf), sz...)
- else
- out = buf
- end
-
- xfer_id = obj isa Dataset ? obj.xfer.id : API.H5P_DEFAULT
- do_reclaim(T) && API.h5d_vlen_reclaim(memtype, memspace, xfer_id, buf)
 
- close(memtype)
- close(memspace)
- close(dspace)
-
- if scalar
- return out[1]
- else
- return out
- end
+ return buf
 end
 
 # Array constructor for datasets
@@ -1303,18 +1496,9 @@ function Base.setindex!(dset::Dataset, X::Array{T}, I::IndexType...) where T
  end
 
  filetype = datatype(dset)
- memtype = Datatype(API.h5t_get_native_type(filetype)) # padded layout in memory
+ memtype = _memtype(filetype, eltype(X))
  close(filetype)
 
- elT = eltype(X)
- if sizeof(elT) != sizeof(memtype)
- error("""
- Type size mismatch
- sizeof($elT) = $(sizeof(elT))
- sizeof($memtype) = $(sizeof(memtype))
- """)
- end
-
  dspace = dataspace(dset)
  stype = API.h5s_get_simple_extent_type(dspace)
  stype == API.H5S_NULL && error("attempting to write to null dataspace")

diff --git a/test/nonallocating.jl b/test/nonallocating.jl
@@ -0,0 +1,39 @@
+using HDF5
+using Test
+
+@testset "non-allocating methods" begin
+ fn = tempname()
+
+ data = rand(UInt16, 16, 16)
+
+ h5open(fn, "w") do h5f
+ h5f["data"] = data
+ end
+
+ h5open(fn, "r") do h5f
+ buffer = similar(h5f["data"])
+ copyto!(buffer, h5f["data"])
+ @test isequal(buffer, data)
+
+ # Consider making this a view later
+ v = h5f["data"][1:4, 1:4]
+
+ buffer = similar(v)
+ @test size(buffer) == (4,4)
+ copyto!(buffer, v)
+ @test isequal(buffer, @view(data[1:4, 1:4]))
+
+ @test size(similar(h5f["data"], Int16)) == size(h5f["data"])
+ @test size(similar(h5f["data"], 5,6)) == (5, 6)
+ @test size(similar(h5f["data"], Int16, 8,7)) == (8,7)
+ @test size(similar(h5f["data"], Int16, 8,7; normalize = false)) == (8,7)
+ @test_broken size(similar(h5f["data"], Int8, 8,7)) == (8,7)
+
+ @test size(similar(h5f["data"], (5,6))) == (5, 6)
+ @test size(similar(h5f["data"], Int16, (8,7))) == (8,7)
+ @test size(similar(h5f["data"], Int16, (8,7); normalize = false)) == (8,7)
+ @test size(similar(h5f["data"], Int16, 0x8,0x7; normalize = false)) == (8,7)
+ end
+
+ rm(fn)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -50,6 +50,8 @@ include("filter.jl")
 include("chunkstorage.jl")
 @debug "fileio"
 include("fileio.jl")
+@debug "nonallocating"
+include("nonallocating.jl")
 @debug "filter test utils"
 include("filters/FilterTestUtils.jl")