Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hyrax 1295 -draft #942

Merged
merged 10 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 234 additions & 3 deletions modules/dmrpp_module/DMZ.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
#include "Chunk.h"
#include "DmrppCommon.h"
#include "DmrppArray.h"
#include "DmrppStructure.h"
#include "DmrppStr.h"
#include "DmrppUrl.h"
#include "DmrppD4Group.h"
Expand Down Expand Up @@ -1517,6 +1518,229 @@ DMZ::process_missing_data(BaseType *btp, const xml_node &missing_data)

}

bool
DMZ::supported_special_structure_type_internal(Constructor *var_ctor) {

bool ret_value = true;
Constructor::Vars_iter vi = var_ctor->var_begin();
Constructor::Vars_iter ve = var_ctor->var_end();
for (; vi != ve; vi++) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you use the const vector<BaseType*> &variables() const { return d_vars; } method, these loops can be re-written using the range-based for loop:

for (auto bt: var_ctor->variables()) {
...
}


BaseType *bt = *vi;
Type t_bt = bt->type();

// Only support array or scalar of float/int/string.
if (libdap::is_simple_type(t_bt) == false) {

if (t_bt != dods_array_c) {
ret_value = false;
break;
}
else {
auto t_a = dynamic_cast<Array *>(bt);
Type t_array_var = t_a->var()->type();
if (!libdap::is_simple_type(t_array_var) || t_array_var == dods_url_c || t_array_var == dods_enum_c || t_array_var==dods_opaque_c) {
ret_value = false;
break;
}
}
}
else if (t_bt == dods_url_c || t_bt == dods_enum_c || t_bt==dods_opaque_c) {
ret_value = false;
break;
}
}

return ret_value;

}

bool
DMZ::supported_special_structure_type(BaseType *btp)
{
bool ret_value = false;
Type t = btp->type();
if ((t == dods_array_c && btp->var()->type() == dods_structure_c) || t==dods_structure_c) {
Constructor *var_constructor = nullptr;
if (t==dods_structure_c)
var_constructor = dynamic_cast<Constructor*>(btp);
else
var_constructor = dynamic_cast<Constructor*>(btp->var());
if (!var_constructor){
throw BESInternalError(
prolog + "Failed to cast " + btp->var()->type_name() + " " + btp->name() +
" to an instance of Constructor." , __FILE__, __LINE__);
}

ret_value = supported_special_structure_type_internal(var_constructor);

}
return ret_value;

}

void
DMZ::process_special_structure_data(BaseType *btp, const xml_node &special_structure_data)
{
BESDEBUG(PARSER, prolog << "Coming to process_special_structure_data() " << endl);

if (supported_special_structure_type(btp) == false)
throw BESInternalError("The dmrpp::the datatype is not a supported special structure variable", __FILE__, __LINE__);

auto char_data = special_structure_data.child_value();
if (!char_data)
throw BESInternalError("The dmrpp::special_structure_data doesn't contain special structure data values.",__FILE__,__LINE__);

std::vector <u_int8_t> values = base64::Base64::decode(char_data);
size_t total_value_size = values.size();

if(btp->type() == dods_array_c) {

auto ar = dynamic_cast<DmrppArray *>(btp);
if(ar->is_projected())
throw BESInternalError("The dmrpp::currently we don't support subsetting of special_structure_data.",__FILE__,__LINE__);

int64_t nelms = ar->length_ll();
size_t values_offset = 0;

for (int64_t element = 0; element < nelms; ++element) {

auto dmrpp_s = dynamic_cast<DmrppStructure*>(ar->var()->ptr_duplicate());
if(!dmrpp_s)
throw InternalErr(__FILE__, __LINE__, "Cannot obtain the structure pointer.");

process_special_structure_data_internal(dmrpp_s, values, total_value_size, values_offset);
ar->set_vec_ll((uint64_t)element,dmrpp_s);
delete dmrpp_s;
}
}
else {

size_t values_offset = 0;
auto dmrpp_s = dynamic_cast<DmrppStructure*>(btp);
if(!dmrpp_s)
throw InternalErr(__FILE__, __LINE__, "Cannot obtain the structure pointer.");
process_special_structure_data_internal(dmrpp_s, values , total_value_size, values_offset);
}

btp->set_read_p(true);

}

void DMZ::process_special_structure_data_internal(DmrppStructure * dmrpp_s, std::vector<u_int8_t> &values , size_t total_value_size, size_t & values_offset){

Constructor::Vars_iter vi = dmrpp_s->var_begin();
Constructor::Vars_iter ve = dmrpp_s->var_end();

for (; vi != ve; vi++) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another loop here.

BaseType *bt = *vi;
Type t_bt = bt->type();
if (libdap::is_simple_type(t_bt) && t_bt != dods_str_c && t_bt != dods_url_c && t_bt!= dods_enum_c && t_bt!=dods_opaque_c) {

BESDEBUG("dmrpp", "var name is: " << bt->name() << "'" << endl);
BESDEBUG("dmrpp", "var values_offset is: " << values_offset << "'" << endl);
bt->val2buf(values.data() + values_offset);
values_offset += bt->width_ll();
}
else if (t_bt == dods_str_c) {
BESDEBUG("dmrpp", "var string name is: " << bt->name() << "'" << endl);
BESDEBUG("dmrpp", "var string values_offset is: " << values_offset << "'" << endl);
if (total_value_size < values_offset)
throw InternalErr(__FILE__, __LINE__, "The offset of the retrieved value is out of the boundary.");
size_t rest_buf_size = total_value_size - values_offset;
u_int8_t* start_pointer = values.data() + values_offset;
vector<char>temp_buf;
temp_buf.resize(rest_buf_size);
memcpy(temp_buf.data(),(void*)start_pointer,rest_buf_size);
// find the index of first ";", the separator
size_t string_stop_index =0;
vector<char> string_value;
for (size_t i = 0; i <rest_buf_size; i++) {
if(temp_buf[i] == ';') {
string_stop_index = i;
break;
}
else
string_value.push_back(temp_buf[i]);
}
string encoded_str(string_value.begin(),string_value.end());
vector <u_int8_t> decoded_str = base64::Base64::decode(encoded_str);
vector <char> decoded_vec;
decoded_vec.resize(decoded_str.size());
memcpy(decoded_vec.data(),(void*)decoded_str.data(),decoded_str.size());
string final_str(decoded_vec.begin(),decoded_vec.end());
bt->val2buf(&final_str);
values_offset = values_offset + string_stop_index+1;
}

else if (t_bt == dods_array_c) {
BESDEBUG("dmrpp", "var array name is: " << bt->name() << "'" << endl);
BESDEBUG("dmrpp", "var array values_offset is: " << values_offset << "'" << endl);

auto t_a = dynamic_cast<Array *>(bt);
Type ar_basetype = t_a->var()->type();
if (libdap::is_simple_type(ar_basetype) && ar_basetype != dods_str_c && ar_basetype != dods_url_c && ar_basetype!= dods_enum_c && ar_basetype!=dods_opaque_c) {
bt->val2buf(values.data() + values_offset);
values_offset += bt->width_ll();
}
else if (ar_basetype == dods_str_c) {

if(total_value_size < values_offset)
throw InternalErr(__FILE__, __LINE__, "The offset of the retrieved value is out of the boundary.");

size_t rest_buf_size = total_value_size - values_offset;
u_int8_t* start_pointer = values.data() + values_offset;
vector<char>temp_buf;
temp_buf.resize(rest_buf_size);
memcpy(temp_buf.data(),(void*)start_pointer,rest_buf_size);

int64_t num_ar_elems = t_a->length_ll();

// We need to create a vector of string to pass the string array.
// Each string's encoded value is separated by ';'.
vector<string> encoded_str;
encoded_str.resize(num_ar_elems);

unsigned int str_index = 0;
size_t string_stop_index = 0;
for (size_t i = 0; i <rest_buf_size; i++) {
if(temp_buf[i] != ';')
encoded_str[str_index].push_back(temp_buf[i]);
else {
str_index++;
if (str_index == num_ar_elems) {
string_stop_index = i;
break;
}
}
}

vector<string> final_str;
final_str.resize(num_ar_elems);

// decode the encoded string
for (size_t i = 0; i <num_ar_elems; i++) {

string temp_encoded_str(encoded_str[i].begin(),encoded_str[i].end());
vector <u_int8_t> decoded_str = base64::Base64::decode(temp_encoded_str);
vector <char> decoded_vec;
decoded_vec.resize(decoded_str.size());
memcpy(decoded_vec.data(),(void*)decoded_str.data(),decoded_str.size());
string temp_final_str(decoded_vec.begin(),decoded_vec.end());
final_str[i] = temp_final_str;
}

t_a->set_value_ll(final_str,num_ar_elems);
values_offset = values_offset + string_stop_index+1;

}
else
throw InternalErr(__FILE__, __LINE__, "The base type of this structure is not integer or float or string. Currently it is not supported.");
}
}
dmrpp_s->set_read_p(true);

}


/**
Expand Down Expand Up @@ -1870,6 +2094,7 @@ void DMZ::load_chunks(BaseType *btp)
int compact_found = 0;
int vlsa_found = 0;
int missing_data_found = 0;
int special_structure_data_found = 0;

// Chunked data
if (process_chunks(btp, var_node)) {
Expand Down Expand Up @@ -1993,18 +2218,24 @@ void DMZ::load_chunks(BaseType *btp)
process_missing_data(btp, missing_data);
}

auto special_structure_data = var_node.child("dmrpp:specialstructuredata");
if (special_structure_data) {
special_structure_data_found = 1;
process_special_structure_data(btp, special_structure_data);
}

auto vlsa_element = var_node.child(DMRPP_VLSA_ELEMENT);
if (vlsa_element) {
vlsa_found = 1;
process_vlsa(btp, vlsa_element);
}

// Here we (optionally) check that exactly one of the three types of node was found
// Here we (optionally) check that exactly one of the supported types of node was found
if (DmrppRequestHandler::d_require_chunks) {
int elements_found = chunks_found + chunk_found + compact_found + vlsa_found + missing_data_found;
int elements_found = chunks_found + chunk_found + compact_found + vlsa_found + missing_data_found + special_structure_data_found;
if (elements_found != 1) {
ostringstream oss;
oss << "Expected chunk, chunks or compact or variable length string or missing data information in the DMR++ data. Found " << elements_found
oss << "Expected chunk, chunks or compact or variable length string or missing data or special structure data information in the DMR++ data. Found " << elements_found
<< " types of nodes.";
throw BESInternalError(oss.str(), __FILE__, __LINE__);
}
Expand Down
5 changes: 5 additions & 0 deletions modules/dmrpp_module/DMZ.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <pugixml.hpp>

#include <libdap/Type.h>
#include <DmrppStructure.h>

namespace libdap {
class DMR;
Expand Down Expand Up @@ -99,6 +100,10 @@ class DMZ {

static void process_compact(libdap::BaseType *btp, const pugi::xml_node &compact);
static void process_missing_data(libdap::BaseType *btp, const pugi::xml_node &missing_data);
static void process_special_structure_data(libdap::BaseType *btp, const pugi::xml_node &special_structure_data);
static void process_special_structure_data_internal(DmrppStructure * dmrpp_s, std::vector<u_int8_t> &values , size_t total_value_size, size_t & values_offset);
static bool supported_special_structure_type(libdap::BaseType *btp);
static bool supported_special_structure_type_internal(libdap::Constructor *var_ctor);
static void process_vlsa(libdap::BaseType *btp, const pugi::xml_node &vlsa_element);

static pugi::xml_node get_variable_xml_node_helper(const pugi::xml_node &var_node, std::stack<libdap::BaseType*> &bt);
Expand Down
19 changes: 18 additions & 1 deletion modules/dmrpp_module/DmrppArray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ DmrppArray::operator=(const DmrppArray &rhs)
* @brief Is this Array subset?
* @return True if the array has a projection expression, false otherwise
*/
bool DmrppArray::is_projected()
bool DmrppArray::is_projected()
{
for (Dim_iter p = dim_begin(), e = dim_end(); p != e; ++p)
if (dimension_size_ll(p, true) != dimension_size_ll(p, false)) return true;
Expand Down Expand Up @@ -2889,6 +2889,16 @@ void missing_data_xml_element(const XMLWriter &xml, DmrppArray *da) {
}
}

void special_structure_array_data_xml_element(const XMLWriter &xml, DmrppArray *da) {

if (da->var()->type() == dods_structure_c) {
vector<char> struct_array_str_buf = da->get_structure_array_str_buffer();
string final_encoded_str = base64::Base64::encode((uint8_t*)(struct_array_str_buf.data()),struct_array_str_buf.size());
da->print_special_structure_element(xml, DmrppCommon::d_ns_prefix, final_encoded_str);
}

}

/**
* @brief Print information about one dimension of the array.
* @param xml
Expand Down Expand Up @@ -3011,6 +3021,13 @@ void DmrppArray::print_dap4(XMLWriter &xml, bool constrained /*false*/) {
missing_data_xml_element(xml, this);
}

// Special structure string array.
if (DmrppCommon::d_print_chunks && get_special_structure_flag() && read_p()) {
special_structure_array_data_xml_element(xml, this);
}



// Is it an array of strings? Those have issues so we treat them special.
if (var()->type() == dods_str_c) {
if (is_flsa() && DmrppCommon::d_print_chunks) {
Expand Down
11 changes: 10 additions & 1 deletion modules/dmrpp_module/DmrppArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,16 @@ class DmrppArray : public libdap::Array, public dmrpp::DmrppCommon {
bool is_variable_length_string_array = false;
bool is_fixed_length_string_array = false;


// In the dmr++ XML:
// <dmrpp:fStringArray string_length="##" pad="null_pad | null_term | space_pad" />
unsigned long long d_fixed_str_length = 0;
string_pad_type d_fixed_length_string_pad_type = not_set;
vector<u_int8_t> d_compact_str_buf;

bool is_projected();
vector<char> d_structure_array_str_buf;
bool is_special_structure = false;


DmrppArray::dimension get_dimension(unsigned int dim_num);

Expand Down Expand Up @@ -232,6 +235,12 @@ class DmrppArray : public libdap::Array, public dmrpp::DmrppCommon {
unsigned int buf2val(void **val) override;
vector<u_int8_t> &compact_str_buffer(){ return d_compact_str_buf; }

vector<char> & get_structure_array_str_buffer() { return d_structure_array_str_buf;}

void set_special_structure_flag(bool is_special_struct) {is_special_structure = is_special_struct;}
bool get_special_structure_flag() { return is_special_structure;}
bool is_projected();

};

/**
Expand Down
11 changes: 11 additions & 0 deletions modules/dmrpp_module/DmrppCommon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,17 @@ DmrppCommon::print_missing_data_element(const XMLWriter &xml, const string &name
throw BESInternalError("Could not write missingdata element.", __FILE__, __LINE__);
}

void
DmrppCommon::print_special_structure_element(const XMLWriter &xml, const string &name_space, const std::string &encoded) const
{

if (xmlTextWriterWriteElementNS(xml.get_writer(), (const xmlChar *) name_space.c_str(),
(const xmlChar *) "specialstructuredata", nullptr,
(const xmlChar *) encoded.c_str()) < 0)
throw BESInternalError("Could not write special structure element.", __FILE__, __LINE__);

}

/**
* @brief Print the DMR++ missing data XML element
* @param xml
Expand Down
1 change: 1 addition & 0 deletions modules/dmrpp_module/DmrppCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ class DmrppCommon {
void print_compact_element(libdap::XMLWriter &xml, const std::string &name_space = "", const std::string &encoded = "") const;
void print_missing_data_element(const libdap::XMLWriter &xml, const std::string &name_space = "", const std::string &encoded = "") const;
void print_missing_data_element(const libdap::XMLWriter &xml, const std::string &name_space, const char *data, int length) const;
void print_special_structure_element(const libdap::XMLWriter &xml, const std::string &name_space = "", const std::string &encoded = "") const;

void print_dmrpp(libdap::XMLWriter &writer, bool constrained = false);

Expand Down
Loading