Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

better filestack performance #614

Merged
merged 25 commits into from
Mar 24, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
_sourcetype => _sourcetrait
  • Loading branch information
rafaqz committed Mar 24, 2024
commit 52025373f4c4e51881f9db8817f02650f29eeaca
2 changes: 1 addition & 1 deletion ext/RastersArchGDALExt/gdal_source.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ end

RA.cleanreturn(A::AG.RasterDataset) = Array(A)
RA.haslayers(::GDALsource) = false
RA._sourcetype(A::AG.RasterDataset) = GDALsource()
RA._sourcetrait(A::AG.RasterDataset) = GDALsource()

"""
Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster; force=false, kw...)
Expand Down
15 changes: 9 additions & 6 deletions ext/RastersHDF5Ext/smap_source.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ const SMAPMISSING = -9999.0f0
const SMAPGEODATA = "Geophysical_Data"
const SMAPCRS = ProjString("+proj=cea +lon_0=0 +lat_ts=30 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0")
const SMAPSIZE = (3856, 1624)
const SMAPDIMTYPES = (X, Y)
const SMAPDIMTYPES = (X(), Y())

# Dataset wrapper ###############################################################
# Becauase SMAP is just one of many HDF5 formats,
Expand All @@ -16,7 +16,7 @@ RA.missingval(ds::SMAPhdf5) = SMAPMISSING
RA.layerkeys(ds::SMAPhdf5) = keys(ds)
RA.filekey(ds::SMAPhdf5, key::Nothing) = first(keys(ds))

function _dims(wrapper::SMAPhdf5)
function _dims(wrapper::SMAPhdf5, args...)
dataset = parent(wrapper)
proj = read(HDF5.attributes(HDF5.root(dataset)["EASE2_global_projection"]), "grid_mapping_name")
if proj == "lambert_cylindrical_equal_area"
Expand Down Expand Up @@ -52,13 +52,16 @@ end
# TODO actually add metadata to the dict
_metadata(wrapper::SMAPhdf5) = RA._metadatadict(SMAPsource())

function _layerdims(ds::SMAPhdf5; keys=RA.cleankeys(RA.layerkeys(ds)))
function _layerdims(ds::SMAPhdf5; layers=nothing)
keys = map(Symbol, isnothing(layers) ? RA.cleankeys(RA.layerkeys(ds)) : layers.keys)
# All dims are the same
NamedTuple{keys}(map(_ -> SMAPDIMTYPES, keys))
NamedTuple{Tuple(keys)}(map(_ -> SMAPDIMTYPES, keys))
end

function _layermetadata(ds::SMAPhdf5; keys=RA.cleankeys(RA.layerkeys(ds)))
NamedTuple{keys}(map(_ -> _metadata(ds), keys))
function _layermetadata(ds::SMAPhdf5; layers=nothing)
keys = map(Symbol, isnothing(layers) ? RA.cleankeys(RA.layerkeys(ds)) : layers.keys)
md = _metadata(ds)
NamedTuple{keys}(map(_ -> md, keys))
end

Base.keys(ds::SMAPhdf5) = RA.cleankeys(keys(parent(ds)[SMAPGEODATA]))
Expand Down
15 changes: 8 additions & 7 deletions src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -268,14 +268,15 @@ function Raster(ds, filename::AbstractString, key=nothing;
source = _sourcetype(filename, source)
crs = defaultcrs(source, crs)
mappedcrs = defaultmappedcrs(source, mappedcrs)
dims = dims isa Nothing ? _dims(ds, crs, mappedcrs) : dims
data = if lazy
FileArray{typeof(source)}(ds, filename; key, write)
else
_open(source, ds; key) do A
_checkmem(A)
Array(A)
data, dims = _open(source, ds; key) do var
dims1 = dims isa Nothing ? _dims(var, crs, mappedcrs) : dims
data = if lazy
FileArray{typeof(source)}(var, filename; key, write)
else
_checkmem(var)
Array(var)
end
data, dims1
end
raster = Raster(data, dims, refdims, name, metadata, missingval)
return dropband ? _drop_single_band(raster, lazy) : raster
Expand Down
35 changes: 17 additions & 18 deletions src/sources/commondatamodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ missingval(A::CFDiskArray) = missingval(parent(A))

# DimensionalData methods
_dims(var::CFDiskArray, args...) = _dims(parent(var), args...)
_layerdims(var::CFDiskArray, args...) = _layerdims(parent(var), args...)
_metadata(var::CFDiskArray, args...) = _metadata(parent(var), args...)

# Base methods
Expand Down Expand Up @@ -210,33 +209,33 @@ function _layers(ds::AbstractDataset, keys)
(; keys, vars, attrs)
end

function _dims(var::AbstractVariable, crs=nothing, mappedcrs=nothing)
map(CDM.dimnames(var)) do key
_cdmdim(CDM.dataset(var), key, crs, mappedcrs)
end |> Tuple
end
function _layerdims(var::AbstractVariable)
map(CDM.dimnames(var)) do dimname
_cdmdim(CDM.dataset(var), dimname)
end |> Tuple
function _dims(var::AbstractVariable{<:Any,N}, crs=nothing, mappedcrs=nothing) where N
dimnames = CDM.dimnames(var)
ntuple(Val(N)) do i
_cdmdim(CDM.dataset(var), dimnames[i], crs, mappedcrs)
end
end
_metadata(var::AbstractVariable; attr=CDM.attribs(var)) =
_metadatadict(_sourcetype(var), attr)

function _dims(ds::AbstractDataset, crs=nothing, mappedcrs=nothing)
map(CDM.dimnames(ds)) do key
_cdmdim(ds, key, crs, mappedcrs)
function _dimdict(ds::AbstractDataset, crs=nothing, mappedcrs=nothing)
dimdict = Dict{String,Dimension}()
for dimname in CDM.dimnames(ds)
dimdict[dimname] = _cdmdim(ds, dimname, crs, mappedcrs)
end
return dimdict
end
function _dims(ds::AbstractDataset, dimdict::Dict)
map(CDM.dimnames(ds)) do dimname
dimdict[dimname]
end |> Tuple
end
_metadata(ds::AbstractDataset; attr=CDM.attribs(ds)) =
_metadatadict(_sourcetype(ds), attr)
function _layerdims(ds::AbstractDataset; layers)
dimdict = map(CDM.dimnames(ds)) do dimname
dimname => _cdmdimtype(ds, dimname)
end |> Dict
function _layerdims(ds::AbstractDataset; layers, dimdict)
map(layers.vars) do var
map(CDM.dimnames(var)) do dimname
dimdict[dimname]
basedims(dimdict[dimname])
end |> Tuple
end
end
Expand Down
8 changes: 5 additions & 3 deletions src/stack.jl
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}};
dims = _dims(ds, crs, mappedcrs)
prod(map(length, dims))
data = if lazy
FileArray{source}(ds, fn; key)
FileArray{typeof(source)}(ds, fn; key)
else
_open(source, ds; key) do A
_checkmem(A)
Expand Down Expand Up @@ -290,10 +290,12 @@ function _layer_stack(filename;
mappedcrs = defaultmappedcrs(source, mappedcrs)
data, field_kw = _open(filename; source) do ds
layers = _layers(ds, keys)
dims = dims isa Nothing ? _dims(ds, crs, mappedcrs) : dims
# Create a Dict of dimkey => Dimension to use in `dim` and `layerdims`
dimdict = _dimdict(ds, crs, mappedcrs)
dims = dims isa Nothing ? _dims(ds, dimdict) : dims
refdims = refdims == () || refdims isa Nothing ? () : refdims
metadata = metadata isa Nothing ? _metadata(ds) : metadata
layerdims = layerdims isa Nothing ? _layerdims(ds; layers) : layerdims
layerdims = layerdims isa Nothing ? _layerdims(ds; layers, dimdict) : layerdims
layermetadata = layermetadata isa Nothing ? _layermetadata(ds; layers) : layermetadata
missingval = missingval isa Nothing ? Rasters.missingval(ds) : missingval
tuplekeys = Tuple(map(Symbol, layers.keys))
Expand Down
21 changes: 10 additions & 11 deletions test/sources/gribdatasets.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
using Rasters, Test, GRIBDatasets
using Rasters: FileArray, GRIBsource
using Rasters: FileArray, FileStack, GRIBsource
using Rasters.Lookups, Rasters.Dimensions
using Statistics
using Dates
Expand Down Expand Up @@ -28,14 +28,18 @@ era5 = joinpath(gribexamples_dir, "era5-levels-members.grib")
@testset "Raster" begin
@time gribarray = Raster(era5)
@time lazyarray = Raster(era5; lazy=true)
@time lazystack = RasterStack(era5; lazy=true)
@time lazystack = RasterStack(era5; lazy=true);
@time eagerstack = RasterStack(era5; lazy=false)
@time ds = GRIBDataset(era5);

@testset "lazyness" begin
@time Raster(era5);
@test parent(gribarray) isa Array
@test parent(lazyarray) isa FileArray
@test parent(eagerstack) isa NamedTuple
@test parent(lazystack) isa FileStack
@test parent(eagerstack[:t]) isa Array
@test parent(lazystack[:t]) isa FileArray
end

@testset "read" begin
Expand All @@ -55,23 +59,18 @@ era5 = joinpath(gribexamples_dir, "era5-levels-members.grib")
stack = RasterStack(era5; lazy = true)
ds = GRIBDataset(era5)

diff = stack[:z][:,:,1,1,1] - ds["z"][:,:,1,1,1]
diff = stack[:z][Z=1, Ti=1, number=1] - ds["z"][:,:,1,1,1]

@test all(diff .== 0.)
end

@testset "eager stack" begin
t = eagerstack[:t]
@test t[:,:,2,3,1] isa AbstractMatrix
end

@testset "array properties" begin
dsvar = ds["z"]
@test size(gribarray) == size(dsvar)
@test gribarray isa Raster
@test index(gribarray, Ti) == DateTime(2017, 1, 1):Hour(12):DateTime(2017, 1, 2, 12)
@test index(gribarray, Y) == 90.0:-3.0:-90.0
@test index(gribarray, X) == 0.0:3.0:357.0
@test lookup(gribarray, Ti) == DateTime(2017, 1, 1):Hour(12):DateTime(2017, 1, 2, 12)
@test lookup(gribarray, Y) == 90.0:-3.0:-90.0
@test lookup(gribarray, X) == 0.0:3.0:357.0
end

@testset "dimensions" begin
Expand Down
4 changes: 2 additions & 2 deletions test/sources/ncdatasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ end
@testset "Raster" begin
@time ncarray = Raster(ncsingle);

@time lazyarray = Raster(ncsingle; lazy=true);
@time eagerarray = Raster(ncsingle; lazy=false);
@time lazyarray = Raster(ncsingle; lazy=true)
@time eagerarray = Raster(ncsingle; lazy=false)
@test_throws ArgumentError Raster("notafile.nc")

@testset "lazyness" begin
Expand Down