Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/EleutherAI/gpt-neox into main
Browse files Browse the repository at this point in the history
  • Loading branch information
kyriemao committed May 22, 2023
2 parents a315331 + 649c309 commit 7dfe6b0
Show file tree
Hide file tree
Showing 5 changed files with 526 additions and 20 deletions.
4 changes: 2 additions & 2 deletions configs/neox_arguments.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ Logging Arguments

- **git_hash**: str

Default = 83e820c
Default = a6b9622

current git hash of repository

Expand Down Expand Up @@ -587,7 +587,7 @@ Model Arguments



- **output_layer_parallelism**: typing.Literal['row', 'column']
- **output_layer_parallelism**: typing.Literal['column']

Default = column

Expand Down
1 change: 0 additions & 1 deletion configs/pythia/1-4B.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,5 @@
"log_interval": 10,
"steps_per_print": 10,
"wall_clock_breakdown": true,

"tokenizer_type": "HFTokenizer"
}
32 changes: 16 additions & 16 deletions megatron/model/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,22 +222,22 @@ def __init__(
skip_bias_add=False,
mup_rescale_parameters=is_last_layer, # rescale params only called if neox_args.use_mup = True, despite it not being included here
)
else:
print(
'ERROR: Output layer parallelism over the hidden dim is currently broken (https://github.com/EleutherAI/gpt-neox/issues/905). Please run with output_layer_parallelism = "column" until this issue is fixed.'
)
exit()
self.final_linear = mpu.RowParallelLinear(
neox_args=neox_args,
input_size=neox_args.hidden_size,
output_size=neox_args.padded_vocab_size,
bias=False,
input_is_parallel=False,
init_method=init_method,
parallel_output=parallel_output,
skip_bias_add=False,
mup_rescale_parameters=is_last_layer, # only called if neox_args.use_mup = True, despite it not being included here
)
# else:
# print(
# 'ERROR: Output layer parallelism over the hidden dim is currently broken (https://github.com/EleutherAI/gpt-neox/issues/905). Please run with output_layer_parallelism = "column" until this issue is fixed.'
# )
# exit()
# self.final_linear = mpu.RowParallelLinear(
# neox_args=neox_args,
# input_size=neox_args.hidden_size,
# output_size=neox_args.padded_vocab_size,
# bias=False,
# input_is_parallel=False,
# init_method=init_method,
# parallel_output=parallel_output,
# skip_bias_add=False,
# mup_rescale_parameters=is_last_layer, # only called if neox_args.use_mup = True, despite it not being included here
# )

def forward(self, hidden_states):
return self.final_linear(hidden_states)
Expand Down
2 changes: 1 addition & 1 deletion megatron/neox_arguments/neox_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ class NeoXArgsModel(NeoXArgsTemplate):
"""

# Output layer parallelism over the hidden dim is currently broken (https://github.com/EleutherAI/gpt-neox/issues/905)
output_layer_parallelism: Literal["row", "column"] = "column"
output_layer_parallelism: Literal["column"] = "column"

"""
Parameter controlling whether the output layer is parallelized over the hidden dim (row) or the vocab dim (column)
Expand Down
Loading

0 comments on commit 7dfe6b0

Please sign in to comment.