Skip to content

Commit

Permalink
chore: provide a warning if the schema contains fields which break CDC
Browse files Browse the repository at this point in the history
  • Loading branch information
rtyler committed Jun 4, 2024
1 parent 2dc1145 commit 27e3d01
Showing 1 changed file with 18 additions and 2 deletions.
20 changes: 18 additions & 2 deletions crates/core/src/operations/cdc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,24 @@ impl CDCTracker {
// Create a new schema which represents the input batch along with the CDC
// columns
let mut fields: Vec<Arc<Field>> = self.schema.fields().to_vec().clone();

let mut has_struct = false;
for field in fields.iter() {
match field.data_type() {
DataType::Struct(_) => {
has_struct = true;
}
DataType::List(_) => {
has_struct = true;
}
_ => {}
}
}

if has_struct {
warn!("The schema contains a Struct or List type, which unfortunately means a change data file cannot be captured in this release of delta-rs: <https://github.com/delta-io/delta-rs/issues/2568>. The write operation will complete properly, but no CDC data will be generated for schema: {fields:?}");
}

fields.push(Arc::new(Field::new("_change_type", DataType::Utf8, true)));
let schema = Arc::new(Schema::new(fields));

Expand Down Expand Up @@ -458,7 +476,6 @@ mod tests {
#[ignore]
#[tokio::test]
async fn test_sanity_check_with_pure_df() {
let _ = pretty_env_logger::try_init();
let nested_schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int32, true),
Field::new("lat", DataType::Int32, true),
Expand Down Expand Up @@ -537,7 +554,6 @@ mod tests {
#[ignore]
#[tokio::test]
async fn test_sanity_check_with_struct() {
let _ = pretty_env_logger::try_init();
let nested_schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int32, true),
Field::new("lat", DataType::Int32, true),
Expand Down

0 comments on commit 27e3d01

Please sign in to comment.