Skip to content

Commit

Permalink
Remove row parallelism (EleutherAI#946)
Browse files Browse the repository at this point in the history
* remove row parallelism

* Update NeoXArgs docs automatically

---------

Co-authored-by: Quentin-Anthony <[email protected]>
Co-authored-by: github-actions <[email protected]>
  • Loading branch information
3 people committed May 19, 2023
1 parent b70d004 commit 649c309
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 19 deletions.
4 changes: 2 additions & 2 deletions configs/neox_arguments.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ Logging Arguments

- **git_hash**: str

Default = 83e820c
Default = a6b9622

current git hash of repository

Expand Down Expand Up @@ -587,7 +587,7 @@ Model Arguments



- **output_layer_parallelism**: typing.Literal['row', 'column']
- **output_layer_parallelism**: typing.Literal['column']

Default = column

Expand Down
32 changes: 16 additions & 16 deletions megatron/model/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,22 +222,22 @@ def __init__(
skip_bias_add=False,
mup_rescale_parameters=is_last_layer, # rescale params only called if neox_args.use_mup = True, despite it not being included here
)
else:
print(
'ERROR: Output layer parallelism over the hidden dim is currently broken (https://github.com/EleutherAI/gpt-neox/issues/905). Please run with output_layer_parallelism = "column" until this issue is fixed.'
)
exit()
self.final_linear = mpu.RowParallelLinear(
neox_args=neox_args,
input_size=neox_args.hidden_size,
output_size=neox_args.padded_vocab_size,
bias=False,
input_is_parallel=False,
init_method=init_method,
parallel_output=parallel_output,
skip_bias_add=False,
mup_rescale_parameters=is_last_layer, # only called if neox_args.use_mup = True, despite it not being included here
)
# else:
# print(
# 'ERROR: Output layer parallelism over the hidden dim is currently broken (https://github.com/EleutherAI/gpt-neox/issues/905). Please run with output_layer_parallelism = "column" until this issue is fixed.'
# )
# exit()
# self.final_linear = mpu.RowParallelLinear(
# neox_args=neox_args,
# input_size=neox_args.hidden_size,
# output_size=neox_args.padded_vocab_size,
# bias=False,
# input_is_parallel=False,
# init_method=init_method,
# parallel_output=parallel_output,
# skip_bias_add=False,
# mup_rescale_parameters=is_last_layer, # only called if neox_args.use_mup = True, despite it not being included here
# )

def forward(self, hidden_states):
return self.final_linear(hidden_states)
Expand Down
2 changes: 1 addition & 1 deletion megatron/neox_arguments/neox_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ class NeoXArgsModel(NeoXArgsTemplate):
"""

# Output layer parallelism over the hidden dim is currently broken (https://github.com/EleutherAI/gpt-neox/issues/905)
output_layer_parallelism: Literal["row", "column"] = "column"
output_layer_parallelism: Literal["column"] = "column"

"""
Parameter controlling whether the output layer is parallelized over the hidden dim (row) or the vocab dim (column)
Expand Down

0 comments on commit 649c309

Please sign in to comment.