-
Notifications
You must be signed in to change notification settings - Fork 1k
/
eleutherai_cluster.yml
29 lines (24 loc) · 1.1 KB
/
eleutherai_cluster.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# Data paths and options when using EleutherAI cluster
{
# you may include multiple distinct datasets if desired
"train_data_paths": ["/mnt/ssd-1/data/enwik8/enwik8_text_document"],
"valid_data_paths": ["/mnt/ssd-1/data/enwik8/enwik8_val_text_document"],
"test_data_paths": ["/mnt/ssd-1/data/enwik8/enwik8_test_text_document"],
# if using multiple datasets, provide weights for them to be sampled with
# "train-data-weights": [1., 2.],
# "test-data-weights": [2., 1.],
# "valid-data-weights": [0.5, 0.4],
# If you would like the code to create val and test datasets from your training set use the following instead
# "split" determines the relative size of train, val, and test
# "split" 995,4,1
# "data_path": "/mnt/ssd-1/data/enwik8/enwik8_text_document",
"vocab_file": "/mnt/ssd-1/data/gpt2-vocab.json",
"merge_file": "/mnt/ssd-1/data/gpt2-merges.txt",
"save": "/mnt/ssd-1/checkpoints",
"load": "/mnt/ssd-1/checkpoints",
"tensorboard_dir": "/mnt/ssd-1/tensorboard",
"log_dir": "/mnt/ssd-1/logs",
"wandb_team": "eleutherai",
"wandb_project": "neox",
"wandb_group": "example"
}