Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-3729: [C++][Parquet] Use logical annotations in Arrow Parquet reader/writer #4421

Closed
wants to merge 9 commits into from
Prev Previous commit
Next Next commit
Set Parquet isAdjustedToUTC to true for timestamps with non-empty tim…
…ezones
  • Loading branch information
tpboudreau committed Jun 18, 2019
commit b5ebdbdb7f26ab86817c0303ba9fe576a62df095
2 changes: 1 addition & 1 deletion cpp/src/parquet/arrow/arrow-reader-writer-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ std::shared_ptr<const LogicalAnnotation> get_logical_annotation(const ::DataType
return LogicalAnnotation::Date();
case ArrowId::TIMESTAMP: {
const auto& ts_type = static_cast<const ::arrow::TimestampType&>(type);
const bool adjusted_to_utc = (ts_type.timezone() == "UTC");
const bool adjusted_to_utc = !(ts_type.timezone().empty());
switch (ts_type.unit()) {
case TimeUnit::MILLI:
return LogicalAnnotation::Timestamp(adjusted_to_utc,
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/parquet/arrow/arrow-schema-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -885,13 +885,13 @@ TEST_F(TestConvertArrowSchema, ArrowFields) {
LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::NANOS),
ParquetType::INT64, -1},
{"timestamp(millisecond, CET)", ::arrow::timestamp(::arrow::TimeUnit::MILLI, "CET"),
LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MILLIS),
LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MILLIS),
ParquetType::INT64, -1},
{"timestamp(microsecond, CET)", ::arrow::timestamp(::arrow::TimeUnit::MICRO, "CET"),
LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MICROS),
LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
ParquetType::INT64, -1},
{"timestamp(nanosecond, CET)", ::arrow::timestamp(::arrow::TimeUnit::NANO, "CET"),
LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::NANOS),
LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::NANOS),
ParquetType::INT64, -1},
{"null", ::arrow::null(), LogicalAnnotation::Null(), ParquetType::INT32, -1}};

Expand Down
10 changes: 2 additions & 8 deletions cpp/src/parquet/arrow/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ static Status MakeArrowTime64(const std::shared_ptr<const LogicalAnnotation>& an
static Status MakeArrowTimestamp(
const std::shared_ptr<const LogicalAnnotation>& annotation,
std::shared_ptr<ArrowType>* out) {
static constexpr auto utc = "UTC";
static const char* utc = "UTC";
const auto& timestamp = checked_cast<const TimestampAnnotation&>(*annotation);
switch (timestamp.time_unit()) {
case LogicalAnnotation::TimeUnit::MILLIS:
Expand Down Expand Up @@ -520,15 +520,9 @@ Status StructToNode(const std::shared_ptr<::arrow::StructType>& type,
return Status::OK();
}

static bool HasUTCTimezone(const std::string& timezone) {
static const std::vector<std::string> utczones{"UTC", "utc"};
return std::any_of(utczones.begin(), utczones.end(),
[timezone](const std::string& utc) { return timezone == utc; });
}

static std::shared_ptr<const LogicalAnnotation> TimestampAnnotationFromArrowTimestamp(
const ::arrow::TimestampType& timestamp_type, ::arrow::TimeUnit::type time_unit) {
const bool utc = HasUTCTimezone(timestamp_type.timezone());
const bool utc = !(timestamp_type.timezone().empty());
switch (time_unit) {
case ::arrow::TimeUnit::MILLI:
return LogicalAnnotation::Timestamp(utc, LogicalAnnotation::TimeUnit::MILLIS);
Expand Down