Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance of casting StringView/BinaryView to DictionaryArray #5872

Merged
merged 10 commits into from
Jun 13, 2024
Prev Previous commit
Next Next commit
add comments
  • Loading branch information
XiangpengHao committed Jun 11, 2024
commit 9331077392fa061a7b6a01973cc64094ad760f1f
8 changes: 6 additions & 2 deletions arrow-cast/src/cast/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,24 +189,28 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
pack_numeric_to_dictionary::<K, Decimal256Type>(array, dict_value_type, cast_options)
}
Utf8 => {
// If the input is a view type, we can avoid casting (thus copying) the data
if array.data_type() == &DataType::Utf8View {
return string_view_to_dictionary::<K, i32>(array);
}
pack_byte_to_dictionary::<K, GenericStringType<i32>>(array, cast_options)
}
LargeUtf8 => {
// If the input is a view type, we can avoid casting (thus copying) the data
if array.data_type() == &DataType::Utf8View {
return string_view_to_dictionary::<K, i64>(array);
}
pack_byte_to_dictionary::<K, GenericStringType<i64>>(array, cast_options)
}
Binary => {
// If the input is a view type, we can avoid casting (thus copying) the data
if array.data_type() == &DataType::BinaryView {
return binary_view_to_dictionary::<K, i32>(array);
}
pack_byte_to_dictionary::<K, GenericBinaryType<i32>>(array, cast_options)
}
LargeBinary => {
// If the input is a view type, we can avoid casting (thus copying) the data
if array.data_type() == &DataType::BinaryView {
return binary_view_to_dictionary::<K, i64>(array);
}
Expand Down Expand Up @@ -269,7 +273,7 @@ where
}
}

return Ok(Arc::new(b.finish()));
Ok(Arc::new(b.finish()))
}

pub(crate) fn binary_view_to_dictionary<K, O: OffsetSizeTrait>(
Expand All @@ -295,7 +299,7 @@ where
}
}

return Ok(Arc::new(b.finish()));
Ok(Arc::new(b.finish()))
}

// Packs the data as a GenericByteDictionaryBuilder, if possible, with the
Expand Down
Loading