Skip to content

Commit

Permalink
Merge branch 'main_beta' into 'main'
Browse files Browse the repository at this point in the history
Adam betas and eps

See merge request ADLR/megatron-lm!156
  • Loading branch information
Mohammad Shoeybi committed Oct 14, 2020
2 parents 5753e8f + c55e154 commit 64cf3d9
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 1 deletion.
9 changes: 9 additions & 0 deletions megatron/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,15 @@ def _add_regularization_args(parser):
help='Weight decay coefficient for L2 regularization.')
group.add_argument('--clip-grad', type=float, default=1.0,
help='Gradient clipping based on global L2 norm.')
group.add_argument('--adam-beta1', type=float, default=0.9,
help='First coefficient for computing running averages of'
'gradient and its square')
group.add_argument('--adam-beta2', type=float, default=0.999,
help='Second coefficient for computing running averages of'
'gradient and its square')
group.add_argument('--adam-eps', type=float, default=1e-08,
help='Term added to the denominator to improve'
'numerical stability')

return parser

Expand Down
3 changes: 2 additions & 1 deletion megatron/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ def get_optimizer(model):
param.model_parallel = False

# Use Adam.
optimizer = Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay)
optimizer = Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay,
betas=(args.adam_beta1, args.adam_beta2), eps=args.adam_eps)

# Wrap into fp16 optimizer.
if args.fp16:
Expand Down

0 comments on commit 64cf3d9

Please sign in to comment.