Skip to content

Commit

Permalink
added large_dtype to to schema convert
Browse files Browse the repository at this point in the history
  • Loading branch information
sherlockbeard authored and rtyler committed Jun 29, 2024
1 parent 4b00dfd commit d8a244f
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
2 changes: 1 addition & 1 deletion python/deltalake/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def write_deltalake(
partition_by = [partition_by]

if isinstance(schema, DeltaSchema):
schema = schema.to_pyarrow(as_large_types=True)
schema = schema.to_pyarrow(as_large_types=large_dtypes)

if isinstance(data, RecordBatchReader):
data = convert_pyarrow_recordbatchreader(data, large_dtypes)
Expand Down
24 changes: 24 additions & 0 deletions python/tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1535,6 +1535,30 @@ def test_with_deltalake_schema(tmp_path: pathlib.Path, sample_data: pa.Table):
assert delta_table.schema().to_pyarrow() == sample_data.schema


def test_with_deltalake_json_schema(tmp_path: pathlib.Path):
json_schema = '{"type": "struct","fields": [{"name": "campaign", "type": "string", "nullable": true, "metadata": {}},{"name": "account", "type": "string", "nullable": true, "metadata": {}}]}'
table_schema = Schema.from_json(json_schema)
table = pa.table(
{
"campaign": pa.array([]),
"account": pa.array([]),
}
)
write_deltalake(tmp_path, table, schema=table_schema)
table = pa.table(
{
"campaign": pa.array(["deltaLake"]),
"account": pa.array(["admin"]),
}
)

write_deltalake(tmp_path, data=table, schema=table_schema, mode="append")

delta_table = DeltaTable(tmp_path)
assert delta_table.schema() == table_schema
assert delta_table.to_pyarrow_table() == table


def test_write_stats_empty_rowgroups(tmp_path: pathlib.Path):
# https://github.com/delta-io/delta-rs/issues/2169
data = pa.table(
Expand Down

0 comments on commit d8a244f

Please sign in to comment.