diff --git a/configs/neox_arguments.md b/configs/neox_arguments.md index c88f90e5d..2d7794105 100644 --- a/configs/neox_arguments.md +++ b/configs/neox_arguments.md @@ -111,7 +111,7 @@ Logging Arguments - **git_hash**: str - Default = 8ebf7c6 + Default = 2ff807d current git hash of repository diff --git a/tools/datasets/corpora.py b/tools/datasets/corpora.py index 35977b908..9056b8f97 100644 --- a/tools/datasets/corpora.py +++ b/tools/datasets/corpora.py @@ -141,7 +141,7 @@ def tokenize(self): [os.path.join(parent_folder, os.path.basename(url)) for url in self.urls] ) - cmd = f"python tools/preprocess_data.py \ + cmd = f"python tools/datasets/preprocess_data.py \ --input {jsonl_filepath} \ --output-prefix {parent_folder}/{self.name} \ --vocab {self.vocab_file} \