Skip to content

Commit

Permalink
Set Parquet isAdjustedToUTC to true for timestamps with non-empty tim…
Browse files Browse the repository at this point in the history
…ezones
  • Loading branch information
tpboudreau committed Jun 18, 2019
1 parent c5de420 commit b5ebdbd
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 12 deletions.
2 changes: 1 addition & 1 deletion cpp/src/parquet/arrow/arrow-reader-writer-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ std::shared_ptr<const LogicalAnnotation> get_logical_annotation(const ::DataType
return LogicalAnnotation::Date();
case ArrowId::TIMESTAMP: {
const auto& ts_type = static_cast<const ::arrow::TimestampType&>(type);
const bool adjusted_to_utc = (ts_type.timezone() == "UTC");
const bool adjusted_to_utc = !(ts_type.timezone().empty());
switch (ts_type.unit()) {
case TimeUnit::MILLI:
return LogicalAnnotation::Timestamp(adjusted_to_utc,
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/parquet/arrow/arrow-schema-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -885,13 +885,13 @@ TEST_F(TestConvertArrowSchema, ArrowFields) {
LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::NANOS),
ParquetType::INT64, -1},
{"timestamp(millisecond, CET)", ::arrow::timestamp(::arrow::TimeUnit::MILLI, "CET"),
LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MILLIS),
LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MILLIS),
ParquetType::INT64, -1},
{"timestamp(microsecond, CET)", ::arrow::timestamp(::arrow::TimeUnit::MICRO, "CET"),
LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::MICROS),
LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::MICROS),
ParquetType::INT64, -1},
{"timestamp(nanosecond, CET)", ::arrow::timestamp(::arrow::TimeUnit::NANO, "CET"),
LogicalAnnotation::Timestamp(false, LogicalAnnotation::TimeUnit::NANOS),
LogicalAnnotation::Timestamp(true, LogicalAnnotation::TimeUnit::NANOS),
ParquetType::INT64, -1},
{"null", ::arrow::null(), LogicalAnnotation::Null(), ParquetType::INT32, -1}};

Expand Down
10 changes: 2 additions & 8 deletions cpp/src/parquet/arrow/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ static Status MakeArrowTime64(const std::shared_ptr<const LogicalAnnotation>& an
static Status MakeArrowTimestamp(
const std::shared_ptr<const LogicalAnnotation>& annotation,
std::shared_ptr<ArrowType>* out) {
static constexpr auto utc = "UTC";
static const char* utc = "UTC";
const auto& timestamp = checked_cast<const TimestampAnnotation&>(*annotation);
switch (timestamp.time_unit()) {
case LogicalAnnotation::TimeUnit::MILLIS:
Expand Down Expand Up @@ -520,15 +520,9 @@ Status StructToNode(const std::shared_ptr<::arrow::StructType>& type,
return Status::OK();
}

static bool HasUTCTimezone(const std::string& timezone) {
static const std::vector<std::string> utczones{"UTC", "utc"};
return std::any_of(utczones.begin(), utczones.end(),
[timezone](const std::string& utc) { return timezone == utc; });
}

static std::shared_ptr<const LogicalAnnotation> TimestampAnnotationFromArrowTimestamp(
const ::arrow::TimestampType& timestamp_type, ::arrow::TimeUnit::type time_unit) {
const bool utc = HasUTCTimezone(timestamp_type.timezone());
const bool utc = !(timestamp_type.timezone().empty());
switch (time_unit) {
case ::arrow::TimeUnit::MILLI:
return LogicalAnnotation::Timestamp(utc, LogicalAnnotation::TimeUnit::MILLIS);
Expand Down

0 comments on commit b5ebdbd

Please sign in to comment.