-
Notifications
You must be signed in to change notification settings - Fork 5.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Train] Simplify llama 2 workspace template (#38444)
* Remove the need for prepare_node stuff by enabling the downloading as part of the training function * Added a script to create job submission yamls * Simplified the ray dataset creation by directly reading the json file into a ray dataset. Signed-off-by: Kourosh Hakhamaneshi <[email protected]>
- Loading branch information
1 parent
0634a5c
commit ab06452
Showing
10 changed files
with
126 additions
and
156 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 2 additions & 0 deletions
2
doc/source/templates/04_finetuning_llms_with_deepspeed/compute_configs/aws_70b_v1.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
region: us-west1 | ||
allowed_azs: [any] | ||
head_node_type: | ||
name: head_node_type | ||
instance_type: g5.48xlarge | ||
|
2 changes: 2 additions & 0 deletions
2
doc/source/templates/04_finetuning_llms_with_deepspeed/compute_configs/aws_70b_v2.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
region: us-west1 | ||
allowed_azs: [any] | ||
head_node_type: | ||
name: head_node_type | ||
instance_type: g5.48xlarge | ||
|
2 changes: 2 additions & 0 deletions
2
doc/source/templates/04_finetuning_llms_with_deepspeed/compute_configs/aws_7b_or_13b.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
79 changes: 79 additions & 0 deletions
79
doc/source/templates/04_finetuning_llms_with_deepspeed/create_job_yaml.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
from argparse import ArgumentParser | ||
|
||
import yaml | ||
import os | ||
import pathlib | ||
|
||
|
||
def _parse_args(): | ||
parser = ArgumentParser() | ||
parser.add_argument( | ||
"--size", | ||
type=str, | ||
default="7b", | ||
choices=["7b", "13b", "70b"], | ||
help="Size of the model to train", | ||
) | ||
parser.add_argument( | ||
"--as-test", action="store_true", help="Whether to run in test mode" | ||
) | ||
parser.add_argument( | ||
"--max-retries", | ||
type=int, | ||
default=0, | ||
help="Number of times to retry the job if it fails", | ||
) | ||
parser.add_argument( | ||
"--output-path", | ||
type=str, | ||
default="./job.yaml", | ||
help="The path that job yaml should be stored.", | ||
) | ||
parser.add_argument("--compute-config", type=str, help="Path to the compute config") | ||
parser.add_argument( | ||
"--cluster-env-build-id", | ||
type=str, | ||
help="The build-id of the cluster env to use", | ||
) | ||
return parser.parse_args() | ||
|
||
|
||
def main(): | ||
pargs = _parse_args() | ||
|
||
# Resolve compute config | ||
compute_config_kwargs = {} | ||
if pargs.compute_config: | ||
with open(pargs.compute_config, "r") as f: | ||
compute_config = yaml.safe_load(f) | ||
compute_config.update( | ||
{ | ||
"cloud_id": os.environ["ANYSCALE_CLOUD_ID"], | ||
} | ||
) | ||
compute_config_kwargs.update(compute_config=compute_config) | ||
|
||
# Resolve cluster env config | ||
cluster_env_config_kwargs = {} | ||
if pargs.cluster_env_build_id: | ||
cluster_env_config_kwargs.update(build_id=pargs.cluster_env_build_id) | ||
|
||
base_cmd = f"chmod +x ./run_llama_ft.sh && ./run_llama_ft.sh --size={pargs.size}" | ||
job_config = { | ||
"name": f"llama-2-{pargs.size}", | ||
"entrypoint": base_cmd + (" --as-test" if pargs.as_test else ""), | ||
"max_retries": pargs.max_retries, | ||
**compute_config_kwargs, | ||
**cluster_env_config_kwargs, | ||
} | ||
|
||
pathlib.Path(os.path.dirname(pargs.output_path)).mkdir(parents=True, exist_ok=True) | ||
with open(pargs.output_path, "w") as f: | ||
yaml.safe_dump(job_config, f) | ||
print("Job config written to ", pargs.output_path) | ||
print("To submit the job, run:") | ||
print(f"anyscale job submit {pargs.output_path}") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
73 changes: 0 additions & 73 deletions
73
doc/source/templates/04_finetuning_llms_with_deepspeed/prepare_nodes.py
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
49 changes: 0 additions & 49 deletions
49
doc/source/templates/04_finetuning_llms_with_deepspeed/run_on_every_node.py
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters