Skip to content

Commit

Permalink
Fix typo
Browse files Browse the repository at this point in the history
  • Loading branch information
leogao2 committed Nov 10, 2020
1 parent 71e3a1c commit 07edce4
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions the_pile/pile.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,9 +315,9 @@ def lang_stats(pile):
def sample_from_sets(datasets, n_docs):
random.seed(42)
for dset, _ in datasets:
fname = 'dataset_samples/{}.json'.format(dset.name().replace(' ', '_')
fname = 'dataset_samples/{}.json'.format(dset.name().replace(' ', '_'))
if os.path.exists(fname): continue

n = dset.num_docs()

# hotfix: github is the only dataset in v1 that's run for less than one epoch, so we only look at the part of it that actually ended up in v1.
Expand All @@ -337,7 +337,7 @@ def sample_from_sets(datasets, n_docs):
except:
pass

with open(fname), 'w') as fh:
with open(fname, 'w') as fh:
json.dump(docs, fh)

pbar.close()
Expand Down

0 comments on commit 07edce4

Please sign in to comment.