Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Dedup strategy that keeps the last not null field #4184

Merged
merged 10 commits into from
Jun 25, 2024
Prev Previous commit
Next Next commit
refactor: rename fields
  • Loading branch information
evenyag committed Jun 21, 2024
commit 799f11fa448c6b5ad35543413932a12b655936c7
25 changes: 13 additions & 12 deletions src/mito2/src/read/dedup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,10 @@ struct LastFieldsBuilder {
/// Only initializes this field when `skip_merge()` is false.
last_fields: Vec<Value>,
/// Whether the last row (including `last_fields`) has null field.
/// Only sets this field when `has_delete` is false.
has_null: bool,
/// Only sets this field when `contains_deletion` is false.
contains_null: bool,
/// Whether the last row has delete op. If true, skips merging fields.
has_delete: bool,
contains_deletion: bool,
/// Whether the builder is initialized.
initialized: bool,
}
Expand All @@ -248,8 +248,8 @@ impl LastFieldsBuilder {
filter_deleted,
builders: Vec::new(),
last_fields: Vec::new(),
has_null: false,
has_delete: false,
contains_null: false,
contains_deletion: false,
initialized: false,
}
}
Expand All @@ -268,10 +268,11 @@ impl LastFieldsBuilder {
let last_idx = batch.num_rows() - 1;
let fields = batch.fields();
// Safety: The last_idx is valid.
self.has_delete = batch.op_types().get_data(last_idx).unwrap() == OpType::Delete as u8;
self.contains_deletion =
batch.op_types().get_data(last_idx).unwrap() == OpType::Delete as u8;
// If the row has been deleted, then we don't need to merge fields.
if !self.has_delete {
self.has_null = fields.iter().any(|col| col.data.is_null(last_idx));
if !self.contains_deletion {
self.contains_null = fields.iter().any(|col| col.data.is_null(last_idx));
}

if self.skip_merge() {
Expand All @@ -292,7 +293,7 @@ impl LastFieldsBuilder {
debug_assert!(self.initialized);

// No null field or the row has been deleted, no need to merge.
self.has_delete || !self.has_null
self.contains_deletion || !self.contains_null
}

/// Pushes first row of a batch to the builder.
Expand All @@ -313,7 +314,7 @@ impl LastFieldsBuilder {
}
}
// Updates the flag.
self.has_null = self.last_fields.iter().any(Value::is_null);
self.contains_null = self.last_fields.iter().any(Value::is_null);
}

/// Merges last not null fields, builds a new batch and resets the builder.
Expand Down Expand Up @@ -377,8 +378,8 @@ impl LastFieldsBuilder {
/// Clears the builder.
fn clear(&mut self) {
self.last_fields.clear();
self.has_null = false;
self.has_delete = false;
self.contains_null = false;
self.contains_deletion = false;
self.initialized = false;
}
}
Expand Down
Loading