Skip to content

Commit

Permalink
Deprecate cpu_offload zero_optimization Option (#720)
Browse files Browse the repository at this point in the history
* Deprecate cpu_offload zero_optimization option

* Update NeoXArgs docs automatically

Co-authored-by: github-actions <[email protected]>
  • Loading branch information
Quentin-Anthony and github-actions committed Nov 25, 2022
1 parent 46b7d82 commit ed09eba
Show file tree
Hide file tree
Showing 19 changed files with 27 additions and 21 deletions.
4 changes: 2 additions & 2 deletions configs/1-3B.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
}
},
"min_lr": 0.00002,


# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 1,
"allgather_partitions": True,
Expand All @@ -44,7 +45,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},

# batch / data settings
Expand Down
2 changes: 1 addition & 1 deletion configs/125M.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
},
"min_lr": 0.00006,

# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 1,
"allgather_partitions": True,
Expand All @@ -45,7 +46,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},

# batch / data settings
Expand Down
3 changes: 2 additions & 1 deletion configs/13B.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
"eps": 1.0e-8,
}
},

# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 1,
"allgather_partitions": True,
Expand All @@ -43,7 +45,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},
"min_lr": 0.00001,

Expand Down
2 changes: 1 addition & 1 deletion configs/175B.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
}
},
"min_lr": 0.000006,
# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 1,
"allgather_partitions": True,
Expand All @@ -43,7 +44,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},

# batch / data settings
Expand Down
2 changes: 1 addition & 1 deletion configs/19M.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
},
"min_lr": 0.0001,

# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 1,
"allgather_partitions": True,
Expand All @@ -38,7 +39,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},

"train_micro_batch_size_per_gpu": 4, #32,
Expand Down
3 changes: 2 additions & 1 deletion configs/2-7B.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
}
},
"min_lr": 0.000016,

# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 1,
"allgather_partitions": True,
Expand All @@ -43,7 +45,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},

# batch / data settings
Expand Down
3 changes: 2 additions & 1 deletion configs/20B.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
},

"min_lr": 0.97e-5,

# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 1,
"allgather_partitions": True,
Expand All @@ -54,7 +56,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 1260000000,
"contiguous_gradients": True,
"cpu_offload": False
},

# batch / data settings (assuming 96 GPUs)
Expand Down
3 changes: 2 additions & 1 deletion configs/350M.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
}
},
"min_lr": 0.00003,

# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 1,
"allgather_partitions": True,
Expand All @@ -43,7 +45,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},
# batch / data settings
"train_micro_batch_size_per_gpu": 4,
Expand Down
2 changes: 1 addition & 1 deletion configs/49M.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
},
"min_lr": 0.00008,

# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 1,
"allgather_partitions": True,
Expand All @@ -42,7 +43,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},

# batch / data settings
Expand Down
3 changes: 2 additions & 1 deletion configs/6-7B.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
"eps": 1.0e-8,
}
},

# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 1,
"allgather_partitions": True,
Expand All @@ -42,7 +44,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},
"min_lr": 0.000012,

Expand Down
3 changes: 2 additions & 1 deletion configs/760M.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
}
},
"min_lr": 0.000025,

# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 1,
"allgather_partitions": True,
Expand All @@ -43,7 +45,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},

# batch / data settings
Expand Down
2 changes: 1 addition & 1 deletion configs/800M.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
},
"min_lr": 0.000025,

# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 1,
"allgather_partitions": True,
Expand All @@ -38,7 +39,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},

"train_micro_batch_size_per_gpu": 16,
Expand Down
4 changes: 2 additions & 2 deletions configs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ For a detailed list of all the arguments available for neox, see [neox_arguments
"betas": [0.9, 0.95]
}
},
# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 0,
"allgather_partitions": True,
Expand All @@ -46,7 +47,6 @@ For a detailed list of all the arguments available for neox, see [neox_arguments
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},

# batch / data settings
Expand Down Expand Up @@ -165,6 +165,7 @@ Available optimizer types are:
### ZeRO Optimization:

```yaml
# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 0,
"allgather_partitions": True,
Expand All @@ -173,7 +174,6 @@ Available optimizer types are:
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},
"zero_allow_untested_optimizer": false,

Expand Down
2 changes: 1 addition & 1 deletion configs/small_bf16.yml → configs/bf16_125M.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"eps": 1.0e-8,
}
},
# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 0,
"allgather_partitions": True,
Expand All @@ -37,7 +38,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},

# batch / data settings
Expand Down
2 changes: 1 addition & 1 deletion configs/bnb_small.yml → configs/bnb_125M.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"eps": 1.0e-8,
}
},
# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 0,
"allgather_partitions": True,
Expand All @@ -38,7 +39,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},

# batch / data settings
Expand Down
2 changes: 1 addition & 1 deletion configs/neox_arguments.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ Logging Arguments

- **git_hash**: str

Default = 32deba2
Default = 62c2de8

current git hash of repository

Expand Down
2 changes: 1 addition & 1 deletion configs/slurm_small.yml → configs/slurm_125M.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"eps": 1.0e-8
}
},
# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 0,
"allgather_partitions": true,
Expand All @@ -27,7 +28,6 @@
"reduce_scatter": true,
"reduce_bucket_size": 500000000,
"contiguous_gradients": true,
"cpu_offload": false
},
"train_micro_batch_size_per_gpu": 4,
"data-impl": "mmap",
Expand Down
2 changes: 1 addition & 1 deletion megatron/neox_arguments/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

# ZERO defaults by deespeed
# These values should not be changed unless defaults in deepspeed are changed
# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
ZERO_DEFAULTS = {
"stage": 0,
"allgather_partitions": True,
Expand All @@ -45,7 +46,6 @@
"reduce_scatter": True,
"reduce_bucket_size": int(5e8),
"contiguous_gradients": False,
"cpu_offload": False,
}

# NeoX optimizer defaults
Expand Down
2 changes: 1 addition & 1 deletion tests/test_configs/test_train_base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
}
},

# for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_optimization": {
"stage": 0,
"allgather_partitions": True,
Expand All @@ -37,7 +38,6 @@
"reduce_scatter": True,
"reduce_bucket_size": 500000000,
"contiguous_gradients": True,
"cpu_offload": False
},

# batch / data settings
Expand Down

0 comments on commit ed09eba

Please sign in to comment.