-
Notifications
You must be signed in to change notification settings - Fork 0
/
grib_to_arrow.cpp
249 lines (222 loc) · 14.3 KB
/
grib_to_arrow.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
#include "../src/gribreader.hpp"
#include "../src/gribmessage.hpp"
#include "../src/exceptions/nosuchgribfileexception.hpp"
#include "../src/exceptions/nosuchlocationsfileexception.hpp"
#include "../src/exceptions/arrowtablereadercreationexception.hpp"
#include "../src/exceptions/arrowgenericexception.hpp"
#include "../src/exceptions/invalidcsvexception.hpp"
#include "../src/exceptions/invalidschemaexception.hpp"
#include "../src/exceptions/gribexception.hpp"
#include <cmath>
//#define USE_CMAKE
#ifdef USE_CMAKE
#include "../pybind11/include/pybind11/pybind11.h"
#include "../pybind11/include/pybind11/stl.h"
#include "../pybind11/include/pybind11/chrono.h"
#else
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "pybind11/chrono.h"
#endif
#include <arrow/api.h>
#include <arrow/python/pyarrow.h>
#include "../src/caster.hpp"
namespace py = pybind11;
using namespace std;
PYBIND11_MODULE(gribtoarrow, m)
{
py::register_exception<InvalidCSVException>(m, "InvalidCSVException");
py::register_exception<NoSuchGribFileException>(m, "NoSuchGribFileException");
py::register_exception<NoSuchLocationsFileException>(m, "NoSuchLocationsFileException");
py::register_exception<UnableToCreateArrowTableReaderException>(m, "UnableToCreateArrowTableReaderException");
py::register_exception<ArrowGenericException>(m, "ArrowGenericException");
py::register_exception<InvalidSchemaException>(m, "InvalidSchemaException");
py::register_exception<GribException>(m, "GribException");
py::module::import("pyarrow");
py::class_<GribReader>(m, "GribReader")
.def(py::init<string>(), pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Creates a new Grib reader.
Parameters
----------
filepath (str): A string containing the full path of the grib file
)EOL") // constructor
.def("withEnabledStationFiltering", &GribReader::withEnabledStationFiltering, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Enables location data filtering to be disabled
Parameters
----------
bool - If True filtering will be done based on the location data and the coorindates of the message
)EOL")
.def("withLocations", py::overload_cast<std::shared_ptr<arrow::Table>>(&GribReader::withLocations), pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Adds locations which will be filtered in each message.
Parameters
----------
locations (pyArrow.Table): A PyArrow table which contains a minimum of 2 columns called lat and lon
The grib will be filtered by any of the coordinates given by lat and lon which are within the grid of
the underlying message.
Any additional columns passed in the table will be passed through in the results when getDataWithLocations
is called on the message. e.g. if you passed a table with the columns "LocationName, Country, lat, lon" then
the fields of LocationName and Country would also be present in the results of the message.
)EOL")
.def("withLocations", py::overload_cast<std::string>(&GribReader::withLocations), pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Adds locations which will be filtered in each message.
Parameters
----------
path (string): Path to a csv containing minimum two columns called lat and lon
The grib will be filtered by any of the coordinates given by lat and lon which are within the grid of
the underlying message.
Any additional columns passed in the table will be passed through in the results when getDataWithLocations
is called on the message. e.g. if you passed a table with the columns "LocationName, Country, lat, lon" then
the fields of LocationName and Country would also be present in the results of the message.
)EOL")
.def("withConversions", py::overload_cast<std::string>(&GribReader::withConversions), pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Adds conversion which be filtered in each message matching message.
Parameters
----------
path (string): Path to a csv containing the following fields (must have a header row and matching column names)
parameterId - integer
addition_value: int / float
subtraction_value: int / float
multiplication_value: int / float
division_value: int / float
ceiling_value: int / float
When the paramterId in the message matches a parameterId in this table the appropriate operation will occur.
e.g. If you wanted to convert from Kelvin to Celcius you would pass a table which contained the parameterId and
contained 273.15 in the column subtraction_value
)EOL")
.def("withConversions", py::overload_cast<std::shared_ptr<arrow::Table>>(&GribReader::withConversions), pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Adds conversion which be filtered in each message matching message.
Parameters
----------
conversions (pyArrow.Table): A PyArrow table which contains the following columns:
parameterId: arrow::int64()
addition_value: arrow::float64()
subtraction_value: arrow::float64()
multiplication_value: arrow::float64()
division_value: arrow::float64()
ceiling_value: arrow::float64()
When the paramterId in the message matches a parameterId in this table the appropriate operation will occur.
e.g. If you wanted to convert from Kelvin to Celcius you would pass a table which contained the parameterId and
contained 273.15 in the column subtraction_value
)EOL")
.def("withRepeatableIterator", &GribReader::withRepeatableIterator, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Enables the message to be iterated multiple times.
)EOL")
.def(
"__iter__",
[](GribReader &s) { return py::make_iterator(s.begin(), s.end()); },
py::keep_alive<0, 1>() )
.doc() = R"EOL(
Enables the easy conversion of data in the grib format to Apache Arrow.
The main entry point is a class called GribReader
)EOL";
py::class_<GribMessage>(m, "GribMessage")
.def("getCodesHandleAddress", &GribMessage::getCodesHandleAddress)
.def("getObjectAddress", &GribMessage::getObjectAddress)
.def("getParameterId", &GribMessage::getParameterId, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
The paramId (parameterId of the message)
)EOL")
.def("getGribMessageId", &GribMessage::getGribMessageId, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
The index of the message in the grib e.g. is this the first message, the 2nd message, the xth message etc..
)EOL")
.def("getModelNumber", &GribMessage::getModelNumber, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
The model / perturbation Number.
)EOL")
.def("getPerturbationNumber", &GribMessage::getModelNumber, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
The perturbation Number (same thing as model)
)EOL")
.def("getStep", &GribMessage::getStep, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
The step interval e.g. 3 (often useful with getStepUnits()) e.g units might be h
)EOL")
.def("getStepUnits", &GribMessage::getStepUnits, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
The step units e.g. h,d,m etc..
)EOL")
.def("getShortName", &GribMessage::getShortName, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
The shortname e.g. tcc (total cloud cover), tp (total precipitation )
)EOL")
.def("getDate", &GribMessage::getDate, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Gets the date of the message as a string
If you don't want to apply any conversion then use getChronoDate which will return a datetime object
)EOL")
.def("getDataType", &GribMessage::getDataType, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Gets the dataType of the message as a string
)EOL")
.def("getTime", &GribMessage::getTime, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Gets the time of the message as a string
If you don't want to apply any conversion then use getChronoDate which will return a datetime object
)EOL")
.def("getDateNumeric", &GribMessage::getDateNumeric, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Gets the date as a number / decimal
If you don't want to apply any conversion then use getChronoDate which will return a datetime object
)EOL")
.def("getTimeNumeric", &GribMessage::getTimeNumeric, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Gets the time as a number / decimal
If you don't want to apply any conversion then use getChronoDate which will return a datetime object
)EOL")
.def("getChronoDate", &GribMessage::getChronoDate, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Gets the date / time as a python Datetime object
)EOL")
.def("getObsDate", &GribMessage::getObsDate, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Gets the Observation date / time as a python Datetime object
)EOL")
.def("getLatitudeOfFirstPoint", &GribMessage::getLatitudeOfFirstPoint, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Gets the Latitude of the first point in the message
)EOL")
.def("getLongitudeOfFirstPoint", &GribMessage::getLongitudeOfFirstPoint, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Gets the Longitude of the first point in the message
)EOL")
.def("getLatitudeOfLastPoint", &GribMessage::getLatitudeOfLastPoint, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Gets the Latitude of the last point in the message
)EOL")
.def("getLongitudeOfLastPoint", &GribMessage::getLongitudeOfLastPoint, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Gets the Longitude of the last point in the message
)EOL")
.def("getGridDefinitionTemplateNumber", &GribMessage::getGridDefinitionTemplateNumber, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Gets the grid defintion template number
)EOL")
.def("getData", &GribMessage::getData, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Gets the Data from the message
Return 3 fields the value and the latitude and longitude or the value
)EOL")
.def("getDataWithLocations", &GribMessage::getDataWithLocations, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Return the values constrained by the locations specified in table to restrict by when passed in the reader
)EOL")
.def("iScansNegatively", &GribMessage::iScansNegatively, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Return if the i(s) scan negatively in the grid
)EOL")
.def("jScansPositively", &GribMessage::jScansPositively, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Return if the j(s) scan positively in the grid
)EOL")
.def("getEditionNumber", &GribMessage::getEditionNumber, pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Return if the grib version e.g. 1/2
)EOL")
.def("getNumericParameterOrDefault", &GribMessage::getNumericParameterOrDefault,
py::arg(nullptr),
py::arg("defaultValue") = -9999
,pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Get the key passed to it or if the key is missing returns the user supplied default value
)EOL")
.def("getDoubleParameterOrDefault", &GribMessage::getDoubleParameterOrDefault,
py::arg(nullptr),
py::arg("defaultValue") = std::nan("")
,pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Get the key passed to it or if the key is missing returns the user supplied default value
)EOL")
.def("getStringParameterOrDefault", &GribMessage::getStringParameterOrDefault,
py::arg(nullptr),
py::arg("defaultValue") = ""
,pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Get the key passed to it or if the key is missing returns the user supplied default value
)EOL")
.def("tryGetKey", &GribMessage::tryGetKey
,pybind11::call_guard<pybind11::gil_scoped_release>(), R"EOL(
Get the key passed to it or if the key is missing returns None
)EOL")
.doc() = R"EOL(
This class provides the ability to access attributes such as the parameterId
There are two methods available to access the data:
getData() - Will return all the data present in the values array for the message even if locations were defined in the reader
getDataWithLocations() - Restricts the results which are within the bounds of the coordinates of the message and the locationss
defined in the reader class
)EOL";
;
}