Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deepspeed benchmarking #878

Draft
wants to merge 24 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f4706e0
add flash_attn_kvpacked
satpalsr Mar 29, 2023
f4a9106
Changed is_pipe_parallel setting to fix pipeline-parallel inference
curt-tigges Mar 31, 2023
83a7b9a
Update NeoXArgs docs automatically
invalid-email-address Mar 31, 2023
45d7052
fix formatting
satpalsr Apr 11, 2023
857c556
gpt benchmark script
cr458 Apr 3, 2023
1ab5bf3
remove duplicate argparse
cr458 Apr 4, 2023
afb6b29
HF inference
cr458 Apr 4, 2023
3f7d605
benchmarking configs + script changes
cr458 Apr 11, 2023
d99d2ce
plot directly, runs deepspeed and hf for single benchmark
cr458 Apr 12, 2023
b0e9745
remove plotting comments
cr458 Apr 12, 2023
9c645dd
accept changes from main & resolve conflicts
satpalsr Apr 15, 2023
ee99945
Merge branch 'main' into flash_attn_infer
satpalsr Apr 15, 2023
9b1733e
tmp changes
cr458 Apr 17, 2023
22cac56
Merge remote-tracking branch 'satpalsr/flash_attn_infer' into deepspe…
cr458 Apr 17, 2023
466749b
merge conflict git hash
cr458 Apr 17, 2023
b10739f
separate scripts for Deepspeed/HF and neox
cr458 Apr 18, 2023
4990f9b
debugging: works when world size > 1 but not otherwise
cr458 Apr 18, 2023
88981b2
working ( but not serially)
cr458 Apr 19, 2023
5e3ca7f
working ish gpt-neox just need to figure out how to get dataframe back
cr458 Apr 20, 2023
3ee9d3b
get dataframe output from stdout
cr458 Apr 20, 2023
2a6e8cd
remove gpt neox inference from script
cr458 May 21, 2023
7ea22d9
remove lines
cr458 May 21, 2023
ef4fdd4
device error
cr458 May 21, 2023
d8184f3
Add DS inference
satpalsr May 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix formatting
  • Loading branch information
satpalsr committed Apr 11, 2023
commit 45d7052e8372105f0f179027978c1eb7a17b04a5
7 changes: 3 additions & 4 deletions configs/neox_arguments.md
Original file line number Diff line number Diff line change
Expand Up @@ -926,7 +926,7 @@ Text Generation arguments

- **prompt_end**: str

Default =
Default =


a single prompt's end. Defaults to newline
Expand Down Expand Up @@ -968,7 +968,7 @@ Text Generation arguments

- **eval_results_prefix**: str

Default =
Default =

prefix to which to save evaluation results - final fp will be {eval_results_prefix}_eval_results_yy-mm-dd-HH-MM.json

Expand Down Expand Up @@ -1686,7 +1686,7 @@ Args for deepspeed config

Default = None





Expand Down Expand Up @@ -1980,4 +1980,3 @@ Args for deepspeed runner (deepspeed.launcher.runner).
Default = None

Adds a `--comment` to the DeepSpeed launch command. In DeeperSpeed this is passed on to the SlurmLauncher as well. Sometime necessary for cluster rules, or so I've heard.

1 change: 0 additions & 1 deletion megatron/data/helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ py::array build_sample_idx_int32(const py::array_t<int32_t>& sizes_,
free_when_done); // numpy array references
}


py::array build_sample_idx_int64(const py::array_t<int32_t>& sizes_,
const py::array_t<int32_t>& doc_idx_,
const int32_t seq_length,
Expand Down
2 changes: 1 addition & 1 deletion megatron/fused_kernels/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def load_fused_kernels():
print(e)
print("=" * 100)
print(
f'ERROR: Fused kernels configured but not properly installed. Please run `pip install {str(srcpath)}` to install them'
f"ERROR: Fused kernels configured but not properly installed. Please run `pip install {str(srcpath)}` to install them"
)
print("=" * 100)
exit()
Expand Down
2 changes: 1 addition & 1 deletion megatron/neox_arguments/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ def consume_deepy_args(cls):
raise e

neox_args.wandb_group += "_" + wandb.util.generate_id()

neox_args.print()

return neox_args
Expand Down
62 changes: 40 additions & 22 deletions tools/merge_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
import sys
import json
import argparse
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
os.path.pardir)))

sys.path.append(
os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
)

from megatron.data import indexed_dataset

Expand All @@ -20,47 +22,63 @@ def main(args):
if not os.path.isfile(os.path.join(args.input, basename)):
continue

ext_pair = '.bin' if ext == '.idx' else '.idx'
assert os.path.isfile(os.path.join(args.input, prefix) + ext_pair), \
f'ERROR: {ext_pair} file not provided for {os.path.join(args.input, prefix)}'
ext_pair = ".bin" if ext == ".idx" else ".idx"
assert os.path.isfile(
os.path.join(args.input, prefix) + ext_pair
), f"ERROR: {ext_pair} file not provided for {os.path.join(args.input, prefix)}"

prefixes.add(prefix)

builder = None
for prefix in sorted(prefixes):
if builder is None:
dataset = indexed_dataset.make_dataset(os.path.join(args.input, prefix), 'infer')
dataset = indexed_dataset.make_dataset(
os.path.join(args.input, prefix), "infer"
)

if isinstance(dataset, indexed_dataset.MMapIndexedDataset):
builder = indexed_dataset.MMapIndexedDatasetBuilder(args.output_prefix + '.bin', dtype=dataset._index.dtype)
builder = indexed_dataset.MMapIndexedDatasetBuilder(
args.output_prefix + ".bin", dtype=dataset._index.dtype
)
else:
builder = indexed_dataset.IndexedDatasetBuilder(args.output_prefix + '.bin')
builder = indexed_dataset.IndexedDatasetBuilder(
args.output_prefix + ".bin"
)

del dataset

builder.merge_file_(os.path.join(args.input, prefix))

builder.finalize(args.output_prefix + '.idx')
builder.finalize(args.output_prefix + ".idx")


if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser()

group = parser.add_argument_group(title='input data')
group.add_argument('--input', type=str, required=True,
help='Path to directory containing all document files to merge')

group = parser.add_argument_group(title='output data')
group.add_argument('--output-prefix', type=str, required=True,
help='Path to binary output file without suffix')
group = parser.add_argument_group(title="input data")
group.add_argument(
"--input",
type=str,
required=True,
help="Path to directory containing all document files to merge",
)

group = parser.add_argument_group(title="output data")
group.add_argument(
"--output-prefix",
type=str,
required=True,
help="Path to binary output file without suffix",
)

args = parser.parse_args()

assert os.path.isdir(args.input), \
f'ERROR: {args.input} is not a directory or does not exist'
assert os.path.isdir(
args.input
), f"ERROR: {args.input} is not a directory or does not exist"

assert os.path.isdir(os.path.dirname(args.output_prefix)), \
f'ERROR: {os.path.dirname(args.output_prefix)} is not a directory or does not exist'
assert os.path.isdir(
os.path.dirname(args.output_prefix)
), f"ERROR: {os.path.dirname(args.output_prefix)} is not a directory or does not exist"

main(args)