Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support passing arguments to underlying filesystem #522

Closed
wants to merge 3 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions kedro/io/partitioned_data_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def __init__( # pylint: disable=too-many-arguments
filepath_arg: str = "filepath",
filename_suffix: str = "",
credentials: Dict[str, Any] = None,
fs_args: Dict[str, Any] = None,
load_args: Dict[str, Any] = None,
):
"""Creates a new instance of ``PartitionedDataSet``.
Expand Down Expand Up @@ -134,6 +135,8 @@ def __init__( # pylint: disable=too-many-arguments
and should not be specified.
All possible credentials management scenarios are documented here:
https://kedro.readthedocs.io/en/stable/04_user_guide/08_advanced_io.html#partitioned-dataset-credentials
fs_args: Extra arguments to pass into underlying filesystem class constructor
(e.g. `{"project": "my-project"}` for ``GCSFileSystem``)
load_args: Keyword arguments to be passed into ``find()`` method of
the filesystem implementation.

Expand All @@ -160,6 +163,8 @@ def __init__( # pylint: disable=too-many-arguments
)
)

self._fs_args = deepcopy(fs_args) or {}

self._credentials, dataset_credentials = _split_credentials(credentials)
if dataset_credentials:
if CREDENTIALS_KEY in self._dataset_config:
Expand Down Expand Up @@ -189,7 +194,7 @@ def _filesystem(self):
import fsspec # pylint: disable=import-outside-toplevel

protocol = "s3" if self._protocol in S3_PROTOCOLS else self._protocol
return fsspec.filesystem(protocol, **self._credentials)
return fsspec.filesystem(protocol, **self._credentials, **self._fs_args)

@property
def _normalized_path(self) -> str:
Expand Down Expand Up @@ -340,6 +345,7 @@ def __init__(
filepath_arg: str = "filepath",
filename_suffix: str = "",
credentials: Dict[str, Any] = None,
fs_args: Dict[str, Any] = None,
load_args: Dict[str, Any] = None,
):

Expand Down Expand Up @@ -383,6 +389,8 @@ def __init__(
credentials spec, then such spec will take precedence.
All possible credentials management scenarios are documented here:
https://kedro.readthedocs.io/en/stable/04_user_guide/08_advanced_io.html#partitioned-dataset-credentials
fs_args: Extra arguments to pass into underlying filesystem class constructor
(e.g. `{"project": "my-project"}` for ``GCSFileSystem``).
load_args: Keyword arguments to be passed into ``find()`` method of
the filesystem implementation.

Expand All @@ -391,7 +399,13 @@ def __init__(
"""

super().__init__(
path, dataset, filepath_arg, filename_suffix, credentials, load_args
path,
dataset,
filepath_arg,
filename_suffix,
credentials,
fs_args,
load_args,
)

self._checkpoint_config = self._parse_checkpoint_config(checkpoint)
Expand Down