-
Notifications
You must be signed in to change notification settings - Fork 65
/
Llama2.json
24 lines (24 loc) · 758 Bytes
/
Llama2.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
{
"model_name_or_path": "meta-llama/Llama-2-7b-chat-hf",
"dataset_name": "wikitext",
"dataset_config_name": "wikitext-103-raw-v1",
"per_device_train_batch_size": 32,
"per_device_eval_batch_size": 32,
"gradient_accumulation_steps": 1,
"do_train": true,
"do_eval": true,
"max_seq_length": 512,
"mask_token_type": "blank",
"data_collator_type": "default",
"mlm_probability": 0.2,
"overwrite_output_dir": true,
"output_dir": "output/mntp/Llama-2-7b-chat-hf",
"evaluation_strategy": "steps",
"eval_steps": 100,
"save_steps": 200,
"stop_after_n_steps": 1000,
"lora_r": 16,
"gradient_checkpointing": true,
"torch_dtype": "bfloat16",
"attn_implementation": "flash_attention_2"
}