diff --git a/.gitignore b/.gitignore index 796b884e5..bb8976dab 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ ## Data data/ +logs/ ## Python __pycache__/ diff --git a/configs/base_deepspeed.json b/configs/base_deepspeed.json index 00f3fcdb9..28f88ebaf 100644 --- a/configs/base_deepspeed.json +++ b/configs/base_deepspeed.json @@ -1,6 +1,11 @@ { "train_batch_size": 8, "gradient_accumulation_steps": 1, + "tensorboard": { + "enabled": true, + "output_path": "./logs", + "job_name": "gptneox" + }, "optimizer": { "type": "Adam", "params": { diff --git a/data/enwik8.gz b/data/enwik8.gz deleted file mode 100644 index 7a8ec66cd..000000000 Binary files a/data/enwik8.gz and /dev/null differ diff --git a/train.sh b/train.sh index 1eb865674..7028d8045 100644 --- a/train.sh +++ b/train.sh @@ -1 +1,2 @@ +mkdir logs NCCL_SHM_DISABLE=1 NCCL_DEBUG=info MASTER_ADDR=127.0.0.1 MASTER_PORT=2000 deepspeed train_enwik8.py --deepspeed --deepspeed_config configs/base_deepspeed.json