Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fail fast for opening non-existent path #1917

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 38 additions & 4 deletions crates/deltalake-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,18 +135,22 @@ pub mod test_utils;

/// Creates and loads a DeltaTable from the given path with current metadata.
/// Infers the storage backend to use from the scheme in the given table path.
///
/// Will fail fast if specified `table_uri` is a local path but doesn't exist.
pub async fn open_table(table_uri: impl AsRef<str>) -> Result<DeltaTable, DeltaTableError> {
let table = DeltaTableBuilder::from_uri(table_uri).load().await?;
let table = DeltaTableBuilder::from_valid_uri(table_uri)?.load().await?;
Ok(table)
}

/// Same as `open_table`, but also accepts storage options to aid in building the table for a deduced
/// `StorageService`.
///
/// Will fail fast if specified `table_uri` is a local path but doesn't exist.
pub async fn open_table_with_storage_options(
table_uri: impl AsRef<str>,
storage_options: HashMap<String, String>,
) -> Result<DeltaTable, DeltaTableError> {
let table = DeltaTableBuilder::from_uri(table_uri)
let table = DeltaTableBuilder::from_valid_uri(table_uri)?
.with_storage_options(storage_options)
.load()
.await?;
Expand All @@ -155,11 +159,13 @@ pub async fn open_table_with_storage_options(

/// Creates a DeltaTable from the given path and loads it with the metadata from the given version.
/// Infers the storage backend to use from the scheme in the given table path.
///
/// Will fail fast if specified `table_uri` is a local path but doesn't exist.
pub async fn open_table_with_version(
table_uri: impl AsRef<str>,
version: i64,
) -> Result<DeltaTable, DeltaTableError> {
let table = DeltaTableBuilder::from_uri(table_uri)
let table = DeltaTableBuilder::from_valid_uri(table_uri)?
.with_version(version)
.load()
.await?;
Expand All @@ -169,11 +175,13 @@ pub async fn open_table_with_version(
/// Creates a DeltaTable from the given path.
/// Loads metadata from the version appropriate based on the given ISO-8601/RFC-3339 timestamp.
/// Infers the storage backend to use from the scheme in the given table path.
///
/// Will fail fast if specified `table_uri` is a local path but doesn't exist.
pub async fn open_table_with_ds(
table_uri: impl AsRef<str>,
ds: impl AsRef<str>,
) -> Result<DeltaTable, DeltaTableError> {
let table = DeltaTableBuilder::from_uri(table_uri)
let table = DeltaTableBuilder::from_valid_uri(table_uri)?
.with_datestring(ds)?
.load()
.await?;
Expand Down Expand Up @@ -680,4 +688,30 @@ mod tests {
),]
);
}

#[tokio::test()]
#[should_panic(expected = "does not exist or you don't have access!")]
async fn test_fail_fast_on_not_existing_path() {
use std::path::Path as FolderPath;

let path_str = "./tests/data/folder_doesnt_exist";

// Check that there is no such path at the beginning
let path_doesnt_exist = !FolderPath::new(path_str).exists();
assert!(path_doesnt_exist);

match crate::open_table(path_str).await {
Ok(table) => Ok(table),
Err(e) => {
let path_still_doesnt_exist = !FolderPath::new(path_str).exists();
assert!(
path_still_doesnt_exist,
"Path still doesn't exist, empty folder wasn't created"
);

Err(e)
}
}
.unwrap();
}
}
64 changes: 44 additions & 20 deletions crates/deltalake-core/src/table/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ impl DeltaTableLoadOptions {
}
}

enum UriType {
LocalPath(PathBuf),
Url(Url),
}

/// builder for configuring a delta table load.
#[derive(Debug)]
pub struct DeltaTableBuilder {
Expand All @@ -154,6 +159,19 @@ impl DeltaTableBuilder {
}
}

/// Creates `DeltaTableBuilder` from verified table uri.
/// Will fail fast if specified `table_uri` is a local path but doesn't exist.
pub fn from_valid_uri(table_uri: impl AsRef<str>) -> DeltaResult<Self> {
let table_uri = table_uri.as_ref();

if let UriType::LocalPath(path) = resolve_uri_type(table_uri)? {
if !path.exists() {
panic!("Path \"{table_uri}\" does not exist or you don't have access!");
}
}

Ok(DeltaTableBuilder::from_uri(table_uri))
}
/// Sets `require_tombstones=false` to the builder
pub fn without_tombstones(mut self) -> Self {
self.options.require_tombstones = false;
Expand Down Expand Up @@ -391,6 +409,30 @@ lazy_static::lazy_static! {
]);
}

/// Utility function to figure out whether string representation of the path
/// is either local path or some kind or URL.
///
/// Will return an error if the path is not valid.
fn resolve_uri_type(table_uri: impl AsRef<str>) -> DeltaResult<UriType> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

basically pulled out from ensure_table_uri to it's own function

let table_uri = table_uri.as_ref();

if let Ok(url) = Url::parse(table_uri) {
if url.scheme() == "file" {
Ok(UriType::LocalPath(url.to_file_path().map_err(|err| {
let msg = format!("Invalid table location: {}\nError: {:?}", table_uri, err);
DeltaTableError::InvalidTableLocation(msg)
})?))
// NOTE this check is required to support absolute windows paths which may properly parse as url
} else if KNOWN_SCHEMES.contains(&url.scheme()) {
Ok(UriType::Url(url))
} else {
Ok(UriType::LocalPath(PathBuf::from(table_uri)))
}
} else {
Ok(UriType::LocalPath(PathBuf::from(table_uri)))
}
}

/// Attempt to create a Url from given table location.
///
/// The location could be:
Expand All @@ -405,25 +447,7 @@ lazy_static::lazy_static! {
pub fn ensure_table_uri(table_uri: impl AsRef<str>) -> DeltaResult<Url> {
let table_uri = table_uri.as_ref();

enum UriType {
LocalPath(PathBuf),
Url(Url),
}
let uri_type: UriType = if let Ok(url) = Url::parse(table_uri) {
if url.scheme() == "file" {
UriType::LocalPath(url.to_file_path().map_err(|err| {
let msg = format!("Invalid table location: {}\nError: {:?}", table_uri, err);
DeltaTableError::InvalidTableLocation(msg)
})?)
// NOTE this check is required to support absolute windows paths which may properly parse as url
} else if KNOWN_SCHEMES.contains(&url.scheme()) {
UriType::Url(url)
} else {
UriType::LocalPath(PathBuf::from(table_uri))
}
} else {
UriType::LocalPath(PathBuf::from(table_uri))
};
let uri_type: UriType = resolve_uri_type(table_uri)?;

// If it is a local path, we need to create it if it does not exist.
let mut url = match uri_type {
Expand Down Expand Up @@ -465,7 +489,7 @@ mod tests {

#[test]
fn test_ensure_table_uri() {
// parse an exisiting relative directory
// parse an existing relative directory
let uri = ensure_table_uri(".");
assert!(uri.is_ok());
let _uri = ensure_table_uri("./nonexistent");
Expand Down
Loading