Skip to content

Commit

Permalink
update default config values
Browse files Browse the repository at this point in the history
  • Loading branch information
sdtblck committed Apr 25, 2021
1 parent 39ded94 commit 207caf6
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 44 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,6 @@ wandb/
# data files
data/
checkpoints/
.vscode/
.vscode/
*.pt
*.ckpt
14 changes: 8 additions & 6 deletions configs/13B.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,21 @@
"num-attention-heads": 40,
"seq-length": 2048,
"max-position-embeddings": 2048,
"norm": "rmsnorm",
"pos-emb": "none",
"norm": "layernorm",
"pos-emb": "rotary",
"no-weight-tying": true,
# this should provide some speedup but takes a while to build, set to true if desired

# these should provide some speedup but takes a while to build, set to true if desired
"scaled-upper-triang-masked-softmax-fusion": false,

"bias-gelu-fusion": false,

# optimizer settings
"optimizer": {
"type": "Adam",
"params": {
"lr": 0.0001,
"max_grad_norm": 1.0,
"betas": [0.9, 0.999]
"betas": [0.9, 0.999],
"eps": 1.0e-8,
}
},
"zero_optimization": {
Expand Down
14 changes: 8 additions & 6 deletions configs/175B.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,21 @@
"num-attention-heads": 96,
"seq-length": 2048,
"max-position-embeddings": 2048,
"norm": "rmsnorm",
"pos-emb": "none",
"norm": "layernorm",
"pos-emb": "rotary",
"no-weight-tying": true,
# this should provide some speedup but takes a while to build, set to true if desired

# these should provide some speedup but takes a while to build, set to true if desired
"scaled-upper-triang-masked-softmax-fusion": false,

"bias-gelu-fusion": false,

# optimizer settings
"optimizer": {
"type": "Adam",
"params": {
"lr": 0.00006,
"max_grad_norm": 1.0,
"betas": [0.9, 0.999]
"betas": [0.9, 0.999],
"eps": 1.0e-8,
}
},
"zero_optimization": {
Expand Down
13 changes: 8 additions & 5 deletions configs/2-7B.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,22 @@
"num-attention-heads": 32,
"seq-length": 2048,
"max-position-embeddings": 2048,
"norm": "rmsnorm",
"pos-emb": "none",
"norm": "layernorm",
"pos-emb": "rotary",
"no-weight-tying": true,
# this should provide some speedup but takes a while to build, set to true if desired

# these should provide some speedup but takes a while to build, set to true if desired
"scaled-upper-triang-masked-softmax-fusion": false,
"bias-gelu-fusion": false,


# optimizer settings
"optimizer": {
"type": "Adam",
"params": {
"lr": 0.00016,
"max_grad_norm": 1.0,
"betas": [0.9, 0.999]
"betas": [0.9, 0.999],
"eps": 1.0e-8,
}
},
"zero_optimization": {
Expand Down
13 changes: 8 additions & 5 deletions configs/6-7B.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,22 @@
"num-attention-heads": 32,
"seq-length": 2048,
"max-position-embeddings": 2048,
"norm": "rmsnorm",
"pos-emb": "none",
"norm": "layernorm",
"pos-emb": "rotary",
"no-weight-tying": true,
# this should provide some speedup but takes a while to build, set to true if desired

# these should provide some speedup but takes a while to build, set to true if desired
"scaled-upper-triang-masked-softmax-fusion": false,
"bias-gelu-fusion": false,


# optimizer settings
"optimizer": {
"type": "Adam",
"params": {
"lr": 0.00012,
"max_grad_norm": 1.0,
"betas": [0.9, 0.999]
"betas": [0.9, 0.999],
"eps": 1.0e-8,
}
},
"zero_optimization": {
Expand Down
13 changes: 7 additions & 6 deletions configs/XL.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,21 @@
"num-attention-heads": 16,
"seq-length": 2048,
"max-position-embeddings": 2048,
"norm": "rmsnorm",
"pos-emb": "none",
"norm": "layernorm",
"pos-emb": "rotary",
"no-weight-tying": true,
# this should provide some speedup but takes a while to build, set to true if desired

# these should provide some speedup but takes a while to build, set to true if desired
"scaled-upper-triang-masked-softmax-fusion": false,
"bias-gelu-fusion": false,


# optimizer settings
"optimizer": {
"type": "Adam",
"params": {
"lr": 0.0002,
"max_grad_norm": 1.0,
"betas": [0.9, 0.999]
"betas": [0.9, 0.999],
"eps": 1.0e-8,
}
},
"zero_optimization": {
Expand Down
13 changes: 8 additions & 5 deletions configs/large.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,22 @@
"num-attention-heads": 16,
"seq-length": 2048,
"max-position-embeddings": 2048,
"norm": "rmsnorm",
"pos-emb": "none",
"norm": "layernorm",
"pos-emb": "rotary",
"no-weight-tying": true,
# this should provide some speedup but takes a while to build, set to true if desired

# these should provide some speedup but takes a while to build, set to true if desired
"scaled-upper-triang-masked-softmax-fusion": false,
"bias-gelu-fusion": false,


# optimizer settings
"optimizer": {
"type": "Adam",
"params": {
"lr": 0.00025,
"max_grad_norm": 1.0,
"betas": [0.9, 0.999]
"betas": [0.9, 0.999],
"eps": 1.0e-8,
}
},
"zero_optimization": {
Expand Down
13 changes: 8 additions & 5 deletions configs/medium.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,23 @@
"num-attention-heads": 16,
"seq-length": 2048,
"max-position-embeddings": 2048,
"norm": "rmsnorm",
"pos-emb": "none",
"norm": "layernorm",
"pos-emb": "rotary",
"no-weight-tying": true,
# this should provide some speedup but takes a while to build, set to true if desired

# these should provide some speedup but takes a while to build, set to true if desired
"scaled-upper-triang-masked-softmax-fusion": false,
"bias-gelu-fusion": false,



# optimizer settings
"optimizer": {
"type": "Adam",
"params": {
"lr": 0.0003,
"max_grad_norm": 1.0,
"betas": [0.9, 0.999]
"betas": [0.9, 0.999],
"eps": 1.0e-8,
}
},
"zero_optimization": {
Expand Down
13 changes: 8 additions & 5 deletions configs/small.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,22 @@
"num-attention-heads": 12,
"seq-length": 2048,
"max-position-embeddings": 2048,
"norm": "rmsnorm",
"pos-emb": "none",
"norm": "layernorm",
"pos-emb": "rotary",
"no-weight-tying": true,
# this should provide some speedup but takes a while to build, set to true if desired

# these should provide some speedup but takes a while to build, set to true if desired
"scaled-upper-triang-masked-softmax-fusion": false,
"bias-gelu-fusion": false,


# optimizer settings
"optimizer": {
"type": "Adam",
"params": {
"lr": 0.0006,
"max_grad_norm": 1.0,
"betas": [0.9, 0.999]
"betas": [0.9, 0.999],
"eps": 1.0e-8,
}
},
"zero_optimization": {
Expand Down

0 comments on commit 207caf6

Please sign in to comment.