Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

better filestack performance #614

Merged
merged 25 commits into from
Mar 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ GeoInterface = "1"
HDF5 = "0.14, 0.15, 0.16"
Makie = "0.19, 0.20"
Missings = "0.4, 1"
NCDatasets = "0.13"
NCDatasets = "0.13, 0.14"
OffsetArrays = "1"
ProgressMeter = "1"
RasterDataSources = "0.5.7"
Expand Down
30 changes: 16 additions & 14 deletions ext/RastersArchGDALExt/gdal_source.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@
end

RA.cleanreturn(A::AG.RasterDataset) = Array(A)
RA.haslayers(::Type{GDALsource}) = false
RA.haslayers(::GDALsource) = false
RA._sourcetrait(A::AG.RasterDataset) = GDALsource()

Check warning on line 46 in ext/RastersArchGDALExt/gdal_source.jl

View check run for this annotation

Codecov / codecov/patch

ext/RastersArchGDALExt/gdal_source.jl#L46

Added line #L46 was not covered by tests

"""
Base.write(filename::AbstractString, ::Type{GDALsource}, A::AbstractRaster; force=false, kw...)
Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster; force=false, kw...)

Write a `Raster` to file using GDAL.

Expand All @@ -58,7 +59,7 @@
Returns `filename`.
"""
function Base.write(
filename::AbstractString, ::Type{GDALsource}, A::AbstractRaster{T};
filename::AbstractString, ::GDALsource, A::AbstractRaster{T};
force=false, verbose=true, kw...
) where T
RA.check_can_write(filename, force)
Expand All @@ -72,7 +73,7 @@
return filename
end

function RA.create(filename, ::Type{GDALsource}, T::Type, dims::DD.DimTuple;
function RA.create(filename, ::GDALsource, T::Type, dims::DD.DimTuple;
missingval=nothing, metadata=nothing, name=nothing, lazy=true, verbose=true, kw...
)
T = Missings.nonmissingtype(T)
Expand All @@ -82,14 +83,14 @@
nothing
end

return Raster(filename; source=GDALsource, name, lazy, dropband=!hasdim(dims, Band))
return Raster(filename; source=GDALsource(), name, lazy, dropband=!hasdim(dims, Band))
end

function _maybe_warn_south_up(A, verbose, msg)
verbose && lookup(A, Y) isa AbstractSampled && order(A, Y) isa ForwardOrdered && @warn msg
end

function RA._open(f, ::Type{GDALsource}, filename::AbstractString; write=false, kw...)
function RA._open(f, ::GDALsource, filename::AbstractString; write=false, kw...)
# Check the file actually exists because the GDAL error is unhelpful
if !isfile(filename)
# Allow gdal virtual file systems
Expand All @@ -113,15 +114,15 @@
AG.readraster(RA.cleanreturn ∘ f, filename)
end
end
RA._open(f, ::Type{GDALsource}, ds::AG.RasterDataset; kw...) = RA.cleanreturn(f(ds))
RA._open(f, ::GDALsource, ds::AG.RasterDataset; kw...) = RA.cleanreturn(f(ds))


# DimensionalData methods for ArchGDAL types ###############################

# These methods are type piracy on DimensionalData/ArchGDAL and may have to move some day

# We allow passing in crs and mappedcrs manually
function DD.dims(raster::AG.RasterDataset, crs=nothing, mappedcrs=nothing)
function RA._dims(raster::AG.RasterDataset, crs=nothing, mappedcrs=nothing)
gt_dims = try
AG.getgeotransform(raster)
catch
Expand Down Expand Up @@ -205,15 +206,16 @@
end
end

function DD.metadata(raster::AG.RasterDataset, args...)
# TODO make metadata optional, its slow to get
function RA._metadata(raster::AG.RasterDataset, args...)
band = AG.getband(raster.ds, 1)
# color = AG.getname(AG.getcolorinterp(band))
scale = AG.getscale(band)
offset = AG.getoffset(band)
# norvw = AG.noverview(band)
units = AG.getunittype(band)
filelist = AG.filelist(raster)
metadata = RA._metadatadict(GDALsource, "scale"=>scale, "offset"=>offset)
metadata = RA._metadatadict(GDALsource(), "scale"=>scale, "offset"=>offset)
if units == ""
metadata["units"] = units
end
Expand All @@ -230,11 +232,11 @@
function RA.Raster(ds::AG.RasterDataset;
crs=crs(ds),
mappedcrs=nothing,
dims=dims(ds, crs, mappedcrs),
dims=RA._dims(ds, crs, mappedcrs),
refdims=(),
name=Symbol(""),
metadata=metadata(ds),
missingval=missingval(ds),
metadata=RA._metadata(ds),
missingval=RA.missingval(ds),
lazy=false,
dropband=false
)
Expand Down Expand Up @@ -322,7 +324,7 @@
_maybe_correct_to_write(A) = _maybe_correct_to_write(lookup(A, X()), A)
_maybe_correct_to_write(lookup, A) = A
function _maybe_correct_to_write(lookup::Union{AbstractSampled,NoLookup}, A)
RA._maybe_use_type_missingval(A, GDALsource) |> _maybe_permute_to_gdal
RA._maybe_use_type_missingval(A, GDALsource()) |> _maybe_permute_to_gdal
end

_check_driver(filename::Nothing, driver) = "MEM"
Expand Down
7 changes: 5 additions & 2 deletions ext/RastersGRIBDatasetsExt/gribdatasets_source.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@ end
# In GRIBDatasets, the file is open for reading the values and closed afterwards.
Base.close(os::RA.OpenStack{GRIBsource}) = nothing

function RA._open(f, ::Type{GRIBsource}, filename::AbstractString; write=false, kw...)
function RA._open(f, ::GRIBsource, filename::AbstractString; write=false, kw...)
isfile(filename) || _filenotfound_error(filename)
ds = GRIBDatasets.GRIBDataset(filename)
RA._open(f, GRIBsource, ds; kw...)
RA._open(f, GRIBsource(), ds; kw...)
end

# Hack to get the inner DiskArrays chunks as they are not exposed at the top level
RA._get_eachchunk(var::GDS.Variable) = DiskArrays.eachchunk(var.values)
RA._get_haschunks(var::GDS.Variable) = DiskArrays.haschunks(var.values)

RA._sourcetrait(::GDS.Variable) = GRIBsource()
RA._sourcetrait(::GDS.GRIBDataset) = GRIBsource()
21 changes: 11 additions & 10 deletions ext/RastersHDF5Ext/smap_source.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
const SMAPGEODATA = "Geophysical_Data"
const SMAPCRS = ProjString("+proj=cea +lon_0=0 +lat_ts=30 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0")
const SMAPSIZE = (3856, 1624)
const SMAPDIMTYPES = (X, Y)
const SMAPDIMTYPES = (X(), Y())

# Dataset wrapper ###############################################################
# Becauase SMAP is just one of many HDF5 formats,
Expand All @@ -16,7 +16,7 @@
RA.layerkeys(ds::SMAPhdf5) = keys(ds)
RA.filekey(ds::SMAPhdf5, key::Nothing) = first(keys(ds))

function DD.dims(wrapper::SMAPhdf5)
function _dims(wrapper::SMAPhdf5, args...)

Check warning on line 19 in ext/RastersHDF5Ext/smap_source.jl

View check run for this annotation

Codecov / codecov/patch

ext/RastersHDF5Ext/smap_source.jl#L19

Added line #L19 was not covered by tests
dataset = parent(wrapper)
proj = read(HDF5.attributes(HDF5.root(dataset)["EASE2_global_projection"]), "grid_mapping_name")
if proj == "lambert_cylindrical_equal_area"
Expand Down Expand Up @@ -50,17 +50,18 @@
end

# TODO actually add metadata to the dict
DD.metadata(wrapper::SMAPhdf5) = RA._metadatadict(SMAPsource)
_metadata(wrapper::SMAPhdf5) = RA._metadatadict(SMAPsource())

Check warning on line 53 in ext/RastersHDF5Ext/smap_source.jl

View check run for this annotation

Codecov / codecov/patch

ext/RastersHDF5Ext/smap_source.jl#L53

Added line #L53 was not covered by tests

function DD.layerdims(ds::SMAPhdf5)
keys = RA.cleankeys(RA.layerkeys(ds))
function _layerdims(ds::SMAPhdf5; layers=nothing)
keys = map(Symbol, isnothing(layers) ? RA.cleankeys(RA.layerkeys(ds)) : layers.keys)

Check warning on line 56 in ext/RastersHDF5Ext/smap_source.jl

View check run for this annotation

Codecov / codecov/patch

ext/RastersHDF5Ext/smap_source.jl#L55-L56

Added lines #L55 - L56 were not covered by tests
# All dims are the same
NamedTuple{keys}(map(_ -> SMAPDIMTYPES, keys))
NamedTuple{Tuple(keys)}(map(_ -> SMAPDIMTYPES, keys))

Check warning on line 58 in ext/RastersHDF5Ext/smap_source.jl

View check run for this annotation

Codecov / codecov/patch

ext/RastersHDF5Ext/smap_source.jl#L58

Added line #L58 was not covered by tests
end

function DD.layermetadata(ds::SMAPhdf5)
keys = RA.cleankeys(RA.layerkeys(ds))
NamedTuple{keys}(map(_ -> DD.metadata(ds), keys))
function _layermetadata(ds::SMAPhdf5; layers=nothing)
keys = map(Symbol, isnothing(layers) ? RA.cleankeys(RA.layerkeys(ds)) : layers.keys)
md = _metadata(ds)
NamedTuple{keys}(map(_ -> md, keys))

Check warning on line 64 in ext/RastersHDF5Ext/smap_source.jl

View check run for this annotation

Codecov / codecov/patch

ext/RastersHDF5Ext/smap_source.jl#L61-L64

Added lines #L61 - L64 were not covered by tests
end

Base.keys(ds::SMAPhdf5) = RA.cleankeys(keys(parent(ds)[SMAPGEODATA]))
Expand Down Expand Up @@ -205,5 +206,5 @@

_smap_timedim(t::DateTime) = _smap_timedim(t:Hour(3):t)
function _smap_timedim(times::AbstractVector)
Ti(Sampled(times, ForwardOrdered(), Regular(Hour(3)), Intervals(Start()), RA._metadatadict(SMAPsource)))
Ti(Sampled(times, ForwardOrdered(), Regular(Hour(3)), Intervals(Start()), RA._metadatadict(SMAPsource())))

Check warning on line 209 in ext/RastersHDF5Ext/smap_source.jl

View check run for this annotation

Codecov / codecov/patch

ext/RastersHDF5Ext/smap_source.jl#L209

Added line #L209 was not covered by tests
end
15 changes: 9 additions & 6 deletions ext/RastersNCDatasetsExt/ncdatasets_source.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ const UNNAMED_NCD_FILE_KEY = "unnamed"
const NCDAllowedType = Union{Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float32,Float64,Char,String}

"""
Base.write(filename::AbstractString, ::Type{<:CDMsource}, A::AbstractRaster)
Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster)

Write an NCDarray to a NetCDF file using NCDatasets.jl

Returns `filename`.
"""
function Base.write(filename::AbstractString, ::Type{<:CDMsource}, A::AbstractRaster;
function Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster;
append=false, force=false, verbose=true, kw...
)
mode = if append
Expand All @@ -33,7 +33,7 @@ end
# Stack ########################################################################

"""
Base.write(filename::AbstractString, ::Type{NCDsource}, s::AbstractRasterStack; kw...)
Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStack; kw...)

Write an NCDstack to a single netcdf file, using NCDatasets.jl.

Expand Down Expand Up @@ -61,7 +61,7 @@ Keywords are passed to `NCDatasets.defVar`.
- `typename` (string): The name of the NetCDF type required for vlen arrays
(https://web.archive.org/save/https://www.unidata.ucar.edu/software/netcdf/netcdf-4/newdocs/netcdf-c/nc_005fdef_005fvlen.html)
"""
function Base.write(filename::AbstractString, ::Type{<:CDMsource}, s::AbstractRasterStack; append = false, kw...)
function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStack; append = false, kw...)
mode = !isfile(filename) || !append ? "c" : "a";
ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(s)))
try
Expand All @@ -77,11 +77,11 @@ function RA.OpenStack(fs::RA.FileStack{NCDsource,K}) where K
end
Base.close(os::RA.OpenStack{NCDsource}) = NCD.close(RA.dataset(os))

function RA._open(f, ::Type{NCDsource}, filename::AbstractString; write=false, kw...)
function RA._open(f, ::NCDsource, filename::AbstractString; write=false, kw...)
isfile(filename) || RA._isurl(filename) || RA._filenotfound_error(filename)
mode = write ? "a" : "r"
NCD.Dataset(filename, mode) do ds
RA._open(f, NCDsource, ds; kw...)
RA._open(f, NCDsource(), ds; kw...)
end
end

Expand Down Expand Up @@ -151,6 +151,9 @@ end
RA._get_eachchunk(var::NCD.Variable) = DiskArrays.eachchunk(var)
RA._get_haschunks(var::NCD.Variable) = DiskArrays.haschunks(var)

RA._sourcetrait(::NCD.Dataset) = NCDsource()
RA._sourcetrait(::NCD.Variable) = NCDsource()

# precompilation

# const _NCDVar = NCDatasets.CFVariable{Union{Missing, Float32}, 3, NCDatasets.Variable{Float32, 3, NCDatasets.NCDataset}, NCDatasets.Attributes{NCDatasets.NCDataset{Nothing}}, NamedTuple{(:fillvalue, :scale_factor, :add_offset, :calendar, :time_origin, :time_factor), Tuple{Float32, Nothing, Nothing, Nothing, Nothing, Nothing}}}
Expand Down
2 changes: 1 addition & 1 deletion src/Rasters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ const DEFAULT_TABLE_DIM_KEYS = (:X, :Y)

include("lookup.jl")
include("dimensions.jl")
include("sources/sources.jl")
include("filearray.jl")
include("filestack.jl")
include("openstack.jl")
Expand All @@ -102,7 +103,6 @@ include("crs.jl")
const RasterStackOrArray = Union{AbstractRasterStack,AbstractRaster}
const RasterSeriesOrStack = Union{AbstractRasterSeries,AbstractRasterStack}

include("sources/sources.jl")
include("utils.jl")
include("skipmissing.jl")

Expand Down
Loading
Loading