Skip to content

Commit

Permalink
add support for downloading specific dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
quqixun committed Feb 2, 2023
1 parent 3377ff8 commit 862ed9c
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 10 deletions.
21 changes: 13 additions & 8 deletions download.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,17 @@ def check_file_exist(filepath, filesize):
return False


def download_features(download_root=None, features_metadata_path=None, node='as'):
print('Downloading features ...')
def download_features(download_root=None, features_metadata_path=None, node='as', target_split='all'):
print(f'Downloading {target_split} features ...')

if download_root is None:
download_root = DOWNLOAD_ROOT_DIR
if features_metadata_path is None:
features_metadata_path = FEATURES_METADATA_PATH

features_metadata = pd.read_csv(features_metadata_path)
if target_split != 'all':
features_metadata = features_metadata.loc[features_metadata['split'] == target_split]
num_files = len(features_metadata)

s3_node = get_s3_node(node)
Expand Down Expand Up @@ -95,16 +97,19 @@ def download_training_labels(download_root=None, training_labels_metadata_path=N
parser.add_argument('--features_metadata', type=str, help='file path of metadata of features')
parser.add_argument('--training_labels_metadata', type=str, help='file path of metadata of training labels')
parser.add_argument('--s3_node', type=str, help='s3 node, us, eu or as')
parser.add_argument('--split', type=str, choices=['train', 'test', 'all'], help='dataset split, train, test or all')
args = parser.parse_args()

download_features(
download_root=args.download_root,
features_metadata_path=args.features_metadata,
node=args.s3_node
node=args.s3_node,
target_split=args.split
)

download_training_labels(
download_root=args.download_root,
training_labels_metadata_path=args.training_labels_metadata,
node=args.s3_node
)
if args.split != 'test':
download_training_labels(
download_root=args.download_root,
training_labels_metadata_path=args.training_labels_metadata,
node=args.s3_node
)
6 changes: 4 additions & 2 deletions scripts/download.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@


s3_node=as
split=test
download_root=./data/source
features_metadata=./data/information/features_metadata.csv
features_metadata=./data/information/features_metadata_FzP19JI.csv
training_labels_metadata=./data/information/train_agbm_metadata.csv

python download.py \
--download_root $download_root \
--features_metadata $features_metadata \
--training_labels_metadata $training_labels_metadata \
--s3_node $s3_node
--s3_node $s3_node \
--split $split

0 comments on commit 862ed9c

Please sign in to comment.