Skip to content

Commit

Permalink
Adding _warmup_mmap_file function missing from MMapIndexedDataset
Browse files Browse the repository at this point in the history
_warmup_mmap_file function copied over from fairseq
  • Loading branch information
rdiehlmartinez committed Feb 1, 2024
1 parent e120750 commit 76eca24
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion utils/mmap_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ def index_file_path(prefix_path):
def data_file_path(prefix_path):
return prefix_path + ".bin"

def _warmup_mmap_file(path):
with open(path, "rb") as stream:
while stream.read(100 * 1024 * 1024):
pass

class MMapIndexedDataset(torch.utils.data.Dataset):
class Index(object):
_HDR_MAGIC = b"MMIDIDX\x00\x00"
Expand Down Expand Up @@ -262,4 +267,4 @@ def supports_prefetch(self):
def exists(path):
return os.path.exists(index_file_path(path)) and os.path.exists(
data_file_path(path)
)
)

0 comments on commit 76eca24

Please sign in to comment.