From 6f661e5d03437ae06bf11e2f976b76abd4249407 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Sun, 20 Sep 2020 19:12:24 -0400 Subject: [PATCH] Support passing arguments to underlying filesystem --- kedro/io/partitioned_data_set.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/kedro/io/partitioned_data_set.py b/kedro/io/partitioned_data_set.py index 7655c66359..b8f25e146b 100644 --- a/kedro/io/partitioned_data_set.py +++ b/kedro/io/partitioned_data_set.py @@ -98,6 +98,7 @@ def __init__( # pylint: disable=too-many-arguments filepath_arg: str = "filepath", filename_suffix: str = "", credentials: Dict[str, Any] = None, + fs_args: Dict[str, Any] = None, load_args: Dict[str, Any] = None, ): """Creates a new instance of ``PartitionedDataSet``. @@ -134,6 +135,8 @@ def __init__( # pylint: disable=too-many-arguments and should not be specified. All possible credentials management scenarios are documented here: https://kedro.readthedocs.io/en/stable/04_user_guide/08_advanced_io.html#partitioned-dataset-credentials + fs_args: Extra arguments to pass into underlying filesystem class constructor + (e.g. `{"project": "my-project"}` for ``GCSFileSystem``) load_args: Keyword arguments to be passed into ``find()`` method of the filesystem implementation. @@ -160,6 +163,8 @@ def __init__( # pylint: disable=too-many-arguments ) ) + self._fs_args = deepcopy(fs_args) or {} + self._credentials, dataset_credentials = _split_credentials(credentials) if dataset_credentials: if CREDENTIALS_KEY in self._dataset_config: @@ -189,7 +194,7 @@ def _filesystem(self): import fsspec # pylint: disable=import-outside-toplevel protocol = "s3" if self._protocol in S3_PROTOCOLS else self._protocol - return fsspec.filesystem(protocol, **self._credentials) + return fsspec.filesystem(protocol, **self._credentials, **self._fs_args) @property def _normalized_path(self) -> str: @@ -340,6 +345,7 @@ def __init__( filepath_arg: str = "filepath", filename_suffix: str = "", credentials: Dict[str, Any] = None, + fs_args: Dict[str, Any] = None, load_args: Dict[str, Any] = None, ): @@ -383,6 +389,8 @@ def __init__( credentials spec, then such spec will take precedence. All possible credentials management scenarios are documented here: https://kedro.readthedocs.io/en/stable/04_user_guide/08_advanced_io.html#partitioned-dataset-credentials + fs_args: Extra arguments to pass into underlying filesystem class constructor + (e.g. `{"project": "my-project"}` for ``GCSFileSystem``). load_args: Keyword arguments to be passed into ``find()`` method of the filesystem implementation. @@ -391,7 +399,13 @@ def __init__( """ super().__init__( - path, dataset, filepath_arg, filename_suffix, credentials, load_args + path, + dataset, + filepath_arg, + filename_suffix, + credentials, + fs_args, + load_args, ) self._checkpoint_config = self._parse_checkpoint_config(checkpoint)