Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Define AttributeDict #948

Merged
merged 7 commits into from
Jun 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/src/api_bindings.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ h5_set_free_list_limits
- [`h5a_get_type`](@ref h5a_get_type)
- [`h5a_iterate`](@ref h5a_iterate)
- [`h5a_open`](@ref h5a_open)
- [`h5a_open_by_idx`](@ref h5a_open_by_idx)
- [`h5a_read`](@ref h5a_read)
- [`h5a_rename`](@ref h5a_rename)
- [`h5a_write`](@ref h5a_write)
```@docs
h5a_close
Expand All @@ -78,7 +80,9 @@ h5a_get_space
h5a_get_type
h5a_iterate
h5a_open
h5a_open_by_idx
h5a_read
h5a_rename
h5a_write
```

Expand Down
6 changes: 5 additions & 1 deletion docs/src/attributes.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
# Attributes

## Dictionary interface

```@docs
HDF5.Attribute
attrs
attributes
```

## Mid-level Interface

```@docs
HDF5.Attribute
open_attribute
create_attribute
read_attribute
write_attribute
delete_attribute
rename_attribute
```

## Convenience interface
Expand Down
2 changes: 2 additions & 0 deletions gen/api_defs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@
@bind h5a_get_type(attr_id::hid_t)::hid_t "Error getting attribute type"
@bind h5a_iterate2(obj_id::hid_t, idx_type::Cint, order::Cint, n::Ptr{hsize_t}, op::Ptr{Cvoid}, op_data::Any)::herr_t string("Error iterating attributes in object ", h5i_get_name(obj_id))
@bind h5a_open(obj_id::hid_t, attr_name::Ptr{UInt8}, aapl_id::hid_t)::hid_t string("Error opening attribute ", attr_name, " for object ", h5i_get_name(obj_id))
@bind h5a_open_by_idx(obj_id::hid_t, pathname::Ptr{UInt8}, idx_type::Cint, order::Cint, n::hsize_t, aapl_id::hid_t, lapl_id::hid_t)::hid_t string("Error opening attribute ", n, " of ", h5i_get_name(obj_id), "/", pathname)
@bind h5a_read(attr_id::hid_t, mem_type_id::hid_t, buf::Ptr{Cvoid})::herr_t string("Error reading attribute ", h5a_get_name(attr_id))
@bind h5a_rename(loc_id::hid_t, old_attr_name::Cstring, new_attr_name::Cstring)::herr_t string("Could not rename attribute")
@bind h5a_write(attr_hid::hid_t, mem_type_id::hid_t, buf::Ptr{Cvoid})::herr_t "Error writing attribute data"

###
Expand Down
2 changes: 1 addition & 1 deletion src/HDF5.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import Mmap
export
@read, @write,
h5open, h5read, h5write, h5rewrite, h5writeattr, h5readattr,
create_attribute, open_attribute, read_attribute, write_attribute, delete_attribute, attributes,
create_attribute, open_attribute, read_attribute, write_attribute, delete_attribute, attributes, attrs,
create_dataset, open_dataset, read_dataset, write_dataset,
create_group, open_group,
copy_object, open_object, delete_object, move_link,
Expand Down
22 changes: 22 additions & 0 deletions src/api/functions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,17 @@ function h5a_open(obj_id, attr_name, aapl_id)
return var"#status#"
end

"""
h5a_open_by_idx(obj_id::hid_t, pathname::Ptr{UInt8}, idx_type::Cint, order::Cint, n::hsize_t, aapl_id::hid_t, lapl_id::hid_t) -> hid_t

See `libhdf5` documentation for [`H5Aopen_by_idx`](https://portal.hdfgroup.org/display/HDF5/H5A_OPEN_BY_IDX).
"""
function h5a_open_by_idx(obj_id, pathname, idx_type, order, n, aapl_id, lapl_id)
var"#status#" = ccall((:H5Aopen_by_idx, libhdf5), hid_t, (hid_t, Ptr{UInt8}, Cint, Cint, hsize_t, hid_t, hid_t), obj_id, pathname, idx_type, order, n, aapl_id, lapl_id)
var"#status#" < 0 && @h5error(string("Error opening attribute ", n, " of ", h5i_get_name(obj_id), "/", pathname))
return var"#status#"
end

"""
h5a_read(attr_id::hid_t, mem_type_id::hid_t, buf::Ptr{Cvoid})

Expand All @@ -274,6 +285,17 @@ function h5a_read(attr_id, mem_type_id, buf)
return nothing
end

"""
h5a_rename(loc_id::hid_t, old_attr_name::Cstring, new_attr_name::Cstring)

See `libhdf5` documentation for [`H5Arename`](https://portal.hdfgroup.org/display/HDF5/H5A_RENAME).
"""
function h5a_rename(loc_id, old_attr_name, new_attr_name)
var"#status#" = ccall((:H5Arename, libhdf5), herr_t, (hid_t, Cstring, Cstring), loc_id, old_attr_name, new_attr_name)
var"#status#" < 0 && @h5error(string("Could not rename attribute"))
return nothing
end

"""
h5a_write(attr_hid::hid_t, mem_type_id::hid_t, buf::Ptr{Cvoid})

Expand Down
117 changes: 100 additions & 17 deletions src/attributes.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# mid-level API

"""
HDF5.Attribute

Expand Down Expand Up @@ -77,7 +79,6 @@ Open the [`Attribute`](@ref) named `name` on the object `parent`.
open_attribute(parent::Union{File,Object}, name::AbstractString, aapl::AttributeAccessProperties=AttributeAccessProperties()) =
Attribute(API.h5a_open(checkvalid(parent), name, aapl), file(parent))


"""
create_attribute(parent::Union{File,Object}, name::AbstractString, dtype::Datatype, space::Dataspace)
create_attribute(parent::Union{File,Object}, name::AbstractString, data)
Expand All @@ -102,44 +103,52 @@ function create_attribute(parent::Union{File,Object}, name::AbstractString, dtyp
return Attribute(attrid, file(parent))
end


# generic method
write_attribute(attr::Attribute, memtype::Datatype, x) = API.h5a_write(attr, memtype, x)
# specific methods
function write_attribute(attr::Attribute, memtype::Datatype, str::AbstractString)
strbuf = Base.cconvert(Cstring, str)
GC.@preserve strbuf begin
buf = Base.unsafe_convert(Ptr{UInt8}, strbuf)
API.h5a_write(attr, memtype, buf)
write_attribute(attr, memtype, buf)
end
end
function write_attribute(attr::Attribute, memtype::Datatype, x::T) where {T<:Union{ScalarType,Complex{<:ScalarType}}}
tmp = Ref{T}(x)
API.h5a_write(attr, memtype, tmp)
write_attribute(attr, memtype, tmp)
end
function write_attribute(attr::Attribute, memtype::Datatype, strs::Array{<:AbstractString})
p = Ref{Cstring}(strs)
API.h5a_write(attr, memtype, p)
write_attribute(attr, memtype, p)
end
write_attribute(attr::Attribute, memtype::Datatype, ::EmptyArray) = nothing
write_attribute(attr::Attribute, memtype::Datatype, x) = API.h5a_write(attr, memtype, x)

"""
write_attribute(parent::Union{File,Object}, name::AbstractString, data)

Write `data` as an [`Attribute`](@ref) named `name` on the object `parent`.
"""
function write_attribute(parent::Union{File,Object}, name::AbstractString, data; pv...)
obj, dtype = create_attribute(parent, name, data; pv...)
attr, dtype = create_attribute(parent, name, data; pv...)
try
write_attribute(obj, dtype, data)
write_attribute(attr, dtype, data)
catch exc
delete_attribute(parent, name)
rethrow(exc)
finally
close(obj)
close(attr)
close(dtype)
end
nothing
end

"""
rename_attribute(parent::Union{File,Object}, oldname::AbstractString, newname::AbstractString)

Rename the [`Attribute`](@ref) of the object `parent` named `oldname` to `newname`.
"""
rename_attribute(parent::Union{File,Object}, oldname::AbstractString, newname::AbstractString) =
API.h5a_rename(checkvalid(parent), oldname, newname)

"""
delete_attribute(parent::Union{File,Object}, name::AbstractString)
Expand All @@ -158,10 +167,7 @@ function h5writeattr(filename, name::AbstractString, data::Dict)
file = h5open(filename, "r+")
try
obj = file[name]
attrs = attributes(obj)
for x in keys(data)
attrs[x] = data[x]
end
merge!(attrs(obj), data)
close(obj)
finally
close(file)
Expand All @@ -178,8 +184,7 @@ function h5readattr(filename, name::AbstractString)
file = h5open(filename,"r")
try
obj = file[name]
a = attributes(obj)
dat = Dict(x => read(a[x]) for x in keys(a))
dat = Dict(attrs(obj))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
dat = Dict(attrs(obj))
dat = Dict(AttributeDict(obj))

Internally, we should just use the constructor.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any difference?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, there is one less level of indirection. For no difference we need const attrs = AttributeDict.

close(obj)
finally
close(file)
Expand All @@ -188,6 +193,85 @@ function h5readattr(filename, name::AbstractString)
end


struct AttributeDict <: AbstractDict{String,Any}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's consider parameterizing this so that we can have a more specific type than Any, which can be the default is not specified.

Copy link
Collaborator Author

@simonbyrne simonbyrne Jun 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see how that could be made to work, or really what advantage it would give. Can you expand on what you had in mind? What would happen if you attempted to read or write an attribute of a different type?

My intention was to provide a simple, easy-to-use interface. I think if you want to specify the return type, it would be better to do in the mid-level interface, e.g. adding a return type to read_attribute.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps AttributeDict{T} could use convert(T, ...)? If convert throws an error, then so be it.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It adds a lot of complexity and potential brittleness, and I'm still skeptical as to why it would be helpful? The files I've seen with attributes use a mix of different types (typically strings, integers and vectors of integers).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's my prototype diff:
JaneliaSciComp@c3a5db3
I added a try to Base.iterate to skip over items where convert fails.

Type stability can be helpful. Since I'm accessing the HDF5 file in Julia rather than Python, I would like to take advantage of the type system. In many cases, I may have prior knowledge of the type I'm expecting the attribute to be, or I can glean that from the API. This provides an easy way to enforce knowledge of the type, allowing subsequent code to be type stable without having to use a completely different API. It also throws an error if my assumptions are violated.

Having the parameter default to Any produces the exact same results you have now. Allowing for a parameter allows the same API to be extended to encourage type stability.

julia> attrs(h5f)
AttributeDict of HDF5.Group: / (file: test.h5) with 4 attributes:
  "bye"     => "3.0"
  "hello"   => 5
  "someint" => 3
  "test"    => 5.5

julia> attrs(h5f, Int)
AttributeDict of HDF5.Group: / (file: test.h5) with 4 attributes:
  "hello"   => 5
  "someint" => 3

julia> attrs(h5f, String)
AttributeDict of HDF5.Group: / (file: test.h5) with 4 attributes:
  "bye" => "3.0"

julia> attrs(h5f, Float64)
AttributeDict of HDF5.Group: / (file: test.h5) with 4 attributes:
  "hello"   => 5.0
  "someint" => 3.0
  "test"    => 5.5

julia> attrs(h5f, Int)["test"]
ERROR: InexactError: Int64(5.5)
Stacktrace:
 [1] Int64
   @ ./float.jl:812 [inlined]
 [2] convert(#unused#::Type{Int64}, x::Float64)
   @ Base ./number.jl:7
 [3] getindex(x::HDF5.AttributeDict{Int64}, name::String)
   @ HDF5 ~/.julia/dev/HDF5/src/attributes.jl:224
 [4] top-level scope
   @ REPL[12]:1

There a few things to explore here, such as passing the parameter as the memtype to read_attribute, but I think this would allow this API to go a lot further. Since we are talking about parameterizing Dataset as well, I think adding a type parameter to AttributeDict would be consistent with future direction of this package.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we are talking about parameterizing Dataset as well, I think adding a type parameter to AttributeDict would be consistent with future direction of this package.

The analogy to Dataset would be adding a type parameter to Attribute though?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dataset would be struct Dataset{T,N} <: AbstractArray{T,N} or struct Dataset{T,N} <: AbstractDiskArray{T,N}.

I wanted to explore if Attribute could become Attribute{T} and have a corresponding struct Attributes{T2} <: AbstractDict{String, Attribute{<:T2}}. However, you want to deprecate that interface, which is why I'm exploring the idea here.

I would be fine if we parameterized the struct but only implemented methods for AttributeDict{Any} in this pull request.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still don't see the benefit of adding a parameter to AttributeDict: how would it help?

parent::Object
mkitti marked this conversation as resolved.
Show resolved Hide resolved
end

"""
attrs(object::Union{File,Group,Dataset,Datatype})

The attributes dictionary of `object`. Returns an `AttributeDict`, a `Dict`-like
object for accessing the attributes of `object`.

```julia
attrs(object)["name"] = value # create/overwrite an attribute
attr = attrs(object)["name"] # read an attribute
delete!(attrs(object), "name") # delete an attribute
keys(attrs(object)) # list the attribute names
```
"""
AttributeDict(file::File) = AttributeDict(open_group(file, "."))

function attrs(parent)
return AttributeDict(parent)
end

Base.haskey(attrdict::AttributeDict, path::AbstractString) = API.h5a_exists(checkvalid(attrdict.parent), path)
Base.length(attrdict::AttributeDict) = Int(object_info(attrdict.parent).num_attrs)
mkitti marked this conversation as resolved.
Show resolved Hide resolved

function Base.getindex(x::AttributeDict, name::AbstractString)
haskey(x, name) || throw(KeyError(name))
read_attribute(x.parent, name)
end
function Base.get(x::AttributeDict, name::AbstractString, default)
haskey(x, name) || return default
read_attribute(x.parent, name)
end
function Base.setindex!(attrdict::AttributeDict, val, name::AbstractString)
if haskey(attrdict, name)
# in case of an error, we write first to a temporary, then rename
_name = tempname()
try
write_attribute(attrdict.parent, _name, val)
delete_attribute(attrdict.parent, name)
rename_attribute(attrdict.parent, _name, name)
finally
haskey(attrdict, _name) && delete_attribute(attrdict.parent, _name)
end
else
write_attribute(attrdict.parent, name, val)
end
end
Base.delete!(attrdict::AttributeDict, path::AbstractString) = delete_attribute(attrdict.parent, path)

function Base.keys(attrdict::AttributeDict)
# faster than iteratively calling h5a_get_name_by_idx
checkvalid(attrdict.parent)
keyvec = sizehint!(String[], length(attrdict))
API.h5a_iterate(attrdict.parent, IDX_TYPE[], ORDER[]) do _, attr_name, _
push!(keyvec, unsafe_string(attr_name))
return false
end
return keyvec
end

function Base.iterate(attrdict::AttributeDict)
# constuct key vector, then iterate
# faster than calling h5a_open_by_idx
iterate(attrdict, (keys(attrdict), 1))
end
function Base.iterate(attrdict::AttributeDict, (keyvec, n))
iter = iterate(keyvec, n)
if isnothing(iter)
return iter
end
key, nn = iter
return (key => attrdict[key]), (keyvec, nn)
end




struct Attributes
parent::Union{File,Object}
end
Expand Down Expand Up @@ -222,8 +306,7 @@ function Base.keys(x::Attributes)
end
Base.read(attr::Attributes, name::AbstractString) = read_attribute(attr.parent, name)


# Datasets act like attributes
# Dataset methods which act like attributes
Base.write(parent::Dataset, name::AbstractString, data; pv...) = write_attribute(parent, name, data; pv...)
function Base.getindex(dset::Dataset, name::AbstractString)
haskey(dset, name) || throw(KeyError(name))
Expand Down
14 changes: 12 additions & 2 deletions src/show.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,16 @@ function Base.show(io::IO, attr::Attributes)
print(io, "Attributes of ", attr.parent)
end

Base.show(io::IO, attrdict::AttributeDict) = summary(io, attrdict)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does Julia end up printing the rest of the attributes ? Is that in some sort of fallback AbstractDict printing?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah it's the 3 argument show method. Thanks, I was wondering why @which was sending me to a seemingly unrelated call.

function Base.summary(io::IO, attrdict::AttributeDict)
print(io, "AttributeDict of ", attrdict.parent)
if isvalid(attrdict.parent)
n = length(attrdict)
print(io, " with ", n, n == 1 ? " attribute" : " attributes")
end
end


const ENDIAN_DICT = Dict(
API.H5T_ORDER_LE => "little endian byte order",
API.H5T_ORDER_BE => "big endian byte order",
Expand Down Expand Up @@ -189,9 +199,9 @@ _tree_head(io::IO, obj) = print(io, _tree_icon(obj), " ", obj)
_tree_head(io::IO, obj::Datatype) = print(io, _tree_icon(obj), " HDF5.Datatype: ", name(obj))

_tree_count(parent::Union{File,Group}, attributes::Bool) =
length(parent) + (attributes ? length(HDF5.attributes(parent)) : 0)
length(parent) + (attributes ? length(HDF5.attrs(parent)) : 0)
_tree_count(parent::Dataset, attributes::Bool) =
attributes ? length(HDF5.attributes(parent)) : 0
attributes ? length(HDF5.attrs(parent)) : 0
_tree_count(parent::Attributes, _::Bool) = length(parent)
_tree_count(parent::Union{Attribute,Datatype}, _::Bool) = 0

Expand Down
74 changes: 74 additions & 0 deletions test/attributes.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
using HDF5, Test

function test_attrs(o::Union{HDF5.File, HDF5.Object})

@test attrs(o) isa HDF5.AttributeDict

attrs(o)["a"] = 1
@test haskey(attrs(o), "a")
@test attrs(o)["a"] == 1

attrs(o)["b"] = [2,3]
@test attrs(o)["b"] == [2,3]
@test haskey(attrs(o), "a")
@test length(attrs(o)) == 2
@test sort(keys(attrs(o))) == ["a", "b"]

@test !haskey(attrs(o), "c")

# overwrite: same type
attrs(o)["a"] = 4
@test attrs(o)["a"] == 4
@test get(attrs(o), "a", nothing) == 4
@test length(attrs(o)) == 2
@test sort(keys(attrs(o))) == ["a", "b"]

# overwrite: different size
attrs(o)["b"] = [4,5,6]
@test attrs(o)["b"] == [4,5,6]
@test length(attrs(o)) == 2
@test sort(keys(attrs(o))) == ["a", "b"]

# overwrite: different type
attrs(o)["b"] = "a string"
@test attrs(o)["b"] == "a string"
@test length(attrs(o)) == 2
@test sort(keys(attrs(o))) == ["a", "b"]

# delete a key
delete!(attrs(o), "a")
@test !haskey(attrs(o), "a")
@test length(attrs(o)) == 1
@test sort(keys(attrs(o))) == ["b"]

@test_throws KeyError attrs(o)["a"]
@test isnothing(get(attrs(o), "a", nothing))

end


@testset "attrs interface" begin
filename = tempname()
f = h5open(filename, "w")

try
# Test attrs on a HDF5.File
test_attrs(f)

# Test attrs on a HDF5.Group
g = create_group(f, "group_foo")
test_attrs(g)

# Test attrs on a HDF5.Dataset
d = create_dataset(g, "dataset_bar", Int, (32, 32))
test_attrs(d)

# Test attrs on a HDF5.Datatype
t = commit_datatype(g, "datatype_int16",
HDF5.Datatype(HDF5.API.h5t_copy(HDF5.API.H5T_NATIVE_INT16))
)
test_attrs(t)
finally
close(f)
end
end
Loading