Skip to content

Commit

Permalink
Merge pull request #24 from EleutherAI/data_fix
Browse files Browse the repository at this point in the history
make index files on all local_rank_0s
  • Loading branch information
StellaAthena committed Feb 10, 2021
2 parents 626c9a5 + d448001 commit 8293398
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion megatron/data/gpt2_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def _build_index_mappings(name, data_prefix, documents, sizes,
shuffle_idx_filename = _filename + '_shuffle_idx.npy'

# Build the indexed mapping if not exist.
if torch.distributed.get_rank() == 0:
if torch.distributed.get_rank() == 0 or int(os.environ["LOCAL_RANK"]) == 0:
if (not os.path.isfile(doc_idx_filename)) or \
(not os.path.isfile(sample_idx_filename)) or \
(not os.path.isfile(shuffle_idx_filename)):
Expand Down

0 comments on commit 8293398

Please sign in to comment.