Feat 1121 (#165)

modelscope · Nov 21, 2023 · 03b8d9e · 03b8d9e
1 parent a95da87
commit 03b8d9e
Show file tree

Hide file tree

Showing 86 changed files with 279 additions and 265 deletions.
diff --git a/README.md b/README.md
@@ -141,7 +141,7 @@ sft_args = SftArguments(
  dataset=[DatasetName.blossom_math_zh],
  output_dir='output',
  gradient_checkpointing=True)
-best_ckpt_dir = sft_main(sft_args)
+best_ckpt_dir = sft_main(sft_args)['best_model_checkpoint']
 print(f'best_ckpt_dir: {best_ckpt_dir}')
 torch.cuda.empty_cache()
 infer_args = InferArguments(
@@ -159,7 +159,11 @@ web_ui_main(infer_args)
 ```bash
 # Experimental environment: A10, 3090, A100, ...
 # 20GB GPU memory
-CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+ --model_id_or_path qwen/Qwen-7B-Chat \
+ --dataset blossom-math-zh \
+ --output_dir output \
 
 # Using DDP
 # Experimental environment: 2 * 3090
@@ -169,18 +173,31 @@ NPROC_PER_NODE=2 \
 swift sft \
  --model_id_or_path qwen/Qwen-7B-Chat \
  --dataset blossom-math-zh \
+ --output_dir output \
 
 # Using custom dataset
-CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --custom_train_dataset_path chatml.jsonl
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+ --model_id_or_path qwen/Qwen-7B-Chat \
+ --custom_train_dataset_path chatml.jsonl \
+ --output_dir output \
 ```
 
 **Inference**:
 ```bash
+# Original Model
+CUDA_VISIBLE_DEVICES=0 swift infer --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh
+
+# Fine-tuned Model
 CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx'
 ```
 
 **Web-UI**:
 ```bash
+# Original Model
+CUDA_VISIBLE_DEVICES=0 swift web-ui --model_id_or_path qwen/Qwen-7B-Chat
+
+# Fine-tuned Model
 CUDA_VISIBLE_DEVICES=0 swift web-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx'
 ```
 

diff --git a/README_CN.md b/README_CN.md
@@ -138,7 +138,7 @@ sft_args = SftArguments(
  dataset=[DatasetName.blossom_math_zh],
  output_dir='output',
  gradient_checkpointing=True)
-best_ckpt_dir = sft_main(sft_args)
+best_ckpt_dir = sft_main(sft_args)['best_model_checkpoint']
 print(f'best_ckpt_dir: {best_ckpt_dir}')
 torch.cuda.empty_cache()
 infer_args = InferArguments(
@@ -156,7 +156,11 @@ web_ui_main(infer_args)
 ```bash
 # Experimental environment: A10, 3090, A100, ...
 # 20GB GPU memory
-CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+ --model_id_or_path qwen/Qwen-7B-Chat \
+ --dataset blossom-math-zh \
+ --output_dir output \
 
 # 使用DDP
 # Experimental environment: 2 * 3090
@@ -166,18 +170,31 @@ NPROC_PER_NODE=2 \
 swift sft \
  --model_id_or_path qwen/Qwen-7B-Chat \
  --dataset blossom-math-zh \
+ --output_dir output \
 
 # 使用自己的数据集
-CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --custom_train_dataset_path chatml.jsonl
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+ --model_id_or_path qwen/Qwen-7B-Chat \
+ --custom_train_dataset_path chatml.jsonl \
+ --output_dir output \
 ```
 
 **推理**:
 ```bash
+# 原始模型
+CUDA_VISIBLE_DEVICES=0 swift infer --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh
+
+# 微调后的模型
 CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx'
 ```
 
-**Web-UI**
+**Web-UI**:
 ```bash
+# 原始模型
+CUDA_VISIBLE_DEVICES=0 swift web-ui --model_id_or_path qwen/Qwen-7B-Chat
+
+# 微调后的模型
 CUDA_VISIBLE_DEVICES=0 swift web-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx'
 ```
 

diff --git a/examples/pytorch/llm/README.md b/examples/pytorch/llm/README.md
@@ -104,7 +104,7 @@ sft_args = SftArguments(
  dataset=[DatasetName.blossom_math_zh],
  output_dir='output',
  gradient_checkpointing=True)
-best_ckpt_dir = sft_main(sft_args)
+best_ckpt_dir = sft_main(sft_args)['best_model_checkpoint']
 print(f'best_ckpt_dir: {best_ckpt_dir}')
 torch.cuda.empty_cache()
 infer_args = InferArguments(
@@ -122,7 +122,11 @@ web_ui_main(infer_args)
 ```bash
 # Experimental environment: A10, 3090, A100, ...
 # 20GB GPU memory
-CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+ --model_id_or_path qwen/Qwen-7B-Chat \
+ --dataset blossom-math-zh \
+ --output_dir output \
 
 # Using DDP
 # Experimental environment: 2 * 3090
@@ -132,18 +136,31 @@ NPROC_PER_NODE=2 \
 swift sft \
  --model_id_or_path qwen/Qwen-7B-Chat \
  --dataset blossom-math-zh \
+ --output_dir output \
 
 # Using custom dataset
-CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --custom_train_dataset_path chatml.jsonl
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+ --model_id_or_path qwen/Qwen-7B-Chat \
+ --custom_train_dataset_path chatml.jsonl \
+ --output_dir output \
 ```
 
 **Inference**:
 ```bash
+# Original Model
+CUDA_VISIBLE_DEVICES=0 swift infer --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh
+
+# Fine-tuned Model
 CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx'
 ```
 
 **Web-UI**:
 ```bash
+# Original Model
+CUDA_VISIBLE_DEVICES=0 swift web-ui --model_id_or_path qwen/Qwen-7B-Chat
+
+# Fine-tuned Model
 CUDA_VISIBLE_DEVICES=0 swift web-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx'
 ```
 
@@ -574,9 +591,9 @@ The template initialization function retrieves the complete chat template based
 -- `check_model_is_latest`: Check if the model is the latest, default is `True`. If you need to train without internet connection, please set this parameter to `False`.
 - `--max_new_tokens`: The maximum number of new tokens to generate. The default value is `2048`. This parameter only takes effect when `predict_with_generate` is set to True.
 - `--do_sample`: Whether to use sampling during generation. The default value is `True`. This parameter only takes effect when `predict_with_generate` is set to True.
-- `--temperature`: The temperature value for sampling during generation. The default value is `0.9`. This parameter only takes effect when `predict_with_generate` is set to True.
+- `--temperature`: The temperature value for sampling during generation. The default value is `0.3`. This parameter only takes effect when `predict_with_generate` is set to True.
 - `--top_k`: The value of k for top-k sampling during generation. The default value is `20`. This parameter only takes effect when `predict_with_generate` is set to True.
-- `--top_p`: The cumulative probability threshold for top-p sampling during generation. The default value is `0.9`. This parameter only takes effect when `predict_with_generate` is set to True.
+- `--top_p`: The cumulative probability threshold for top-p sampling during generation. The default value is `0.7`. This parameter only takes effect when `predict_with_generate` is set to True.
 - `--repetition_penalty`: The repetition penalty applied during generation. The default value is `1.05`. This parameter only takes effect when `predict_with_generate` is set to True.
 
 
@@ -606,9 +623,9 @@ The template initialization function retrieves the complete chat template based
 - `--bnb_4bit_use_double_quant`: Default value is `True`. For specific parameter details, please refer to the `sft.sh Command Line Arguments`. This parameter is not effective if `quantization_bit` is set to 0.
 - `--max_new_tokens`: Maximum number of new tokens to generate. Default value is `2048`.
 - `--do_sample`: Whether to use greedy decoding or sampling for generation. Default value is `True`.
-- `--temperature`: Default value is `0.9`. This parameter only takes effect when `do_sample` is set to True.
+- `--temperature`: Default value is `0.3`. This parameter only takes effect when `do_sample` is set to True.
 - `--top_k`: Default value is `20`. This parameter only takes effect when `do_sample` is set to True.
-- `--top_p`: Default value is `0.9`. This parameter only takes effect when `do_sample` is set to True.
+- `--top_p`: Default value is `0.7`. This parameter only takes effect when `do_sample` is set to True.
 - `--repetition_penalty`: Default value is `1.05`.
 - `--use_flash_attn`: Default value is `None`, which means 'auto'. For specific parameter details, please refer to the `sft.sh Command Line Arguments`. The models that support 'flash_attn' include: qwen series, qwen-vl series, llama series, openbuddy series, mistral series, yi series, ziya series.
 - `--ignore_args_error`: Default value is `False`. For specific parameter details, please refer to the `sft.sh Command Line Arguments`.

diff --git a/examples/pytorch/llm/README_CN.md b/examples/pytorch/llm/README_CN.md
@@ -103,7 +103,7 @@ sft_args = SftArguments(
  dataset=[DatasetName.blossom_math_zh],
  output_dir='output',
  gradient_checkpointing=True)
-best_ckpt_dir = sft_main(sft_args)
+best_ckpt_dir = sft_main(sft_args)['best_model_checkpoint']
 print(f'best_ckpt_dir: {best_ckpt_dir}')
 torch.cuda.empty_cache()
 infer_args = InferArguments(
@@ -121,7 +121,11 @@ web_ui_main(infer_args)
 ```bash
 # Experimental environment: A10, 3090, A100, ...
 # 20GB GPU memory
-CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+ --model_id_or_path qwen/Qwen-7B-Chat \
+ --dataset blossom-math-zh \
+ --output_dir output \
 
 # 使用DDP
 # Experimental environment: 2 * 3090
@@ -131,18 +135,31 @@ NPROC_PER_NODE=2 \
 swift sft \
  --model_id_or_path qwen/Qwen-7B-Chat \
  --dataset blossom-math-zh \
+ --output_dir output \
 
 # 使用自己的数据集
-CUDA_VISIBLE_DEVICES=0 swift sft --model_id_or_path qwen/Qwen-7B-Chat --custom_train_dataset_path chatml.jsonl
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+ --model_id_or_path qwen/Qwen-7B-Chat \
+ --custom_train_dataset_path chatml.jsonl \
+ --output_dir output \
 ```
 
 **推理**:
 ```bash
+# 原始模型
+CUDA_VISIBLE_DEVICES=0 swift infer --model_id_or_path qwen/Qwen-7B-Chat --dataset blossom-math-zh
+
+# 微调后的模型
 CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx'
 ```
 
-**Web-UI**
+**Web-UI**:
 ```bash
+# 原始模型
+CUDA_VISIBLE_DEVICES=0 swift web-ui --model_id_or_path qwen/Qwen-7B-Chat
+
+# 微调后的模型
 CUDA_VISIBLE_DEVICES=0 swift web-ui --ckpt_dir 'xxx/vx_xxx/checkpoint-xxx'
 ```
 
@@ -577,9 +594,9 @@ if __name__ == '__main__':
 - `--check_model_is_latest`: 检查模型是否是最新, 默认为`True`. 如果你需要断网进行训练, 请将该参数设置为`False`.
 - `--max_new_tokens`: 默认为`2048`. 该参数只有在`predict_with_generate`设置为True的时候才生效.
 - `--do_sample`: 默认为`True`. 该参数只有在`predict_with_generate`设置为True的时候才生效.
-- `--temperature`: 默认为`0.9`. 该参数只有在`predict_with_generate`设置为True的时候才生效.
+- `--temperature`: 默认为`0.3`. 该参数只有在`predict_with_generate`设置为True的时候才生效.
 - `--top_k`: 默认为`20`. 该参数只有在`predict_with_generate`设置为True的时候才生效.
-- `--top_p`: 默认为`0.9`. 该参数只有在`predict_with_generate`设置为True的时候才生效.
+- `--top_p`: 默认为`0.7`. 该参数只有在`predict_with_generate`设置为True的时候才生效.
 - `--repetition_penalty`: 默认为`1.05`. 该参数只有在`predict_with_generate`设置为True的时候才生效.
 
 
@@ -609,9 +626,9 @@ if __name__ == '__main__':
 - `--bnb_4bit_use_double_quant`: 默认值为`True`. 具体的参数介绍可以在`sft.sh命令行参数`中查看. 若`quantization_bit`设置为0, 则该参数失效.
 - `--max_new_tokens`: 生成新token的最大数量, 默认值为`2048`.
 - `--do_sample`: 是使用贪婪生成的方式还是采样生成的方式, 默认值为`True`.
-- `--temperature`: 默认值为`0.9`. 该参数只有在`do_sample`设置为True时才生效.
+- `--temperature`: 默认值为`0.3`. 该参数只有在`do_sample`设置为True时才生效.
 - `--top_k`: 默认值为`20`. 该参数只有在`do_sample`设置为True时才生效.
-- `--top_p`: 默认值为`0.9`. 该参数只有在`do_sample`设置为True时才生效.
+- `--top_p`: 默认值为`0.7`. 该参数只有在`do_sample`设置为True时才生效.
 - `--repetition_penalty`: 默认值为`1.05`.
 - `--use_flash_attn`: 默认值为`None`, 即为'auto'. 具体的参数介绍可以在`sft.sh命令行参数`中查看.
 - `--ignore_args_error`: 默认值为`False`, 具体的参数介绍可以在`sft.sh命令行参数`中查看.

diff --git a/examples/pytorch/llm/app.py b/examples/pytorch/llm/app.py
@@ -12,5 +12,5 @@
  # or chat
  args = InferArguments(model_type=ModelType.qwen_7b_chat_int4)
  # or load from ckpt dir
- # args = InferArguments(ckpt_dir='xxx/vx_xxx/checkpoint-xxx', load_args_from_ckpt_dir=True)
+ # args = InferArguments(ckpt_dir='xxx/vx_xxx/checkpoint-xxx')
  web_ui_main(args)
diff --git a/examples/pytorch/llm/llm_infer.py b/examples/pytorch/llm/llm_infer.py
@@ -4,4 +4,5 @@
 from swift.llm.run import infer_main
 
 if __name__ == '__main__':
- infer_main()
+ result = infer_main()
+ print(f'infer_main result: {result}')
diff --git a/examples/pytorch/llm/llm_sft.py b/examples/pytorch/llm/llm_sft.py
@@ -4,5 +4,5 @@
 from swift.llm.run import sft_main
 
 if __name__ == '__main__':
- best_ckpt_dir = sft_main()
- print(f'best_ckpt_dir: {best_ckpt_dir}')
+ output = sft_main()
+ print(f'sft_main output: {output}')
diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_ddp_ds/infer.sh
@@ -8,9 +8,8 @@ python llm_infer.py \
  --eval_human false \
  --max_length 4096 \
  --max_new_tokens 2048 \
- --temperature 0.9 \
- --top_k 20 \
- --top_p 0.9 \
+ --temperature 0.1 \
+ --top_p 0.7 \
  --repetition_penalty 1.05 \
  --do_sample true \
  --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat/lora_mp_ddp/infer.sh
@@ -8,9 +8,8 @@ python llm_infer.py \
  --eval_human false \
  --max_length 2048 \
  --max_new_tokens 2048 \
- --temperature 0.9 \
- --top_k 20 \
- --top_p 0.9 \
+ --temperature 0.1 \
+ --top_p 0.7 \
  --repetition_penalty 1.05 \
  --do_sample true \
  --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat/qlora_ddp_ds/infer.sh
@@ -7,9 +7,8 @@ python llm_infer.py \
  --eval_human false \
  --max_length 4096 \
  --max_new_tokens 2048 \
- --temperature 0.9 \
- --top_k 20 \
- --top_p 0.9 \
+ --temperature 0.1 \
+ --top_p 0.7 \
  --repetition_penalty 1.05 \
  --do_sample true \
  --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_13b_chat_int4/qlora_ddp_ds/infer.sh
@@ -7,9 +7,8 @@ python llm_infer.py \
  --eval_human false \
  --max_length 4096 \
  --max_new_tokens 2048 \
- --temperature 0.9 \
- --top_k 20 \
- --top_p 0.9 \
+ --temperature 0.1 \
+ --top_p 0.7 \
  --repetition_penalty 1.05 \
  --do_sample true \
  --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b/qlora/infer.sh
@@ -7,9 +7,8 @@ python llm_infer.py \
  --eval_human false \
  --max_length 2048 \
  --max_new_tokens 2048 \
- --temperature 0.9 \
- --top_k 20 \
- --top_p 0.9 \
+ --temperature 0.7 \
+ --top_p 0.7 \
  --repetition_penalty 1.05 \
  --do_sample true \
  --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh
@@ -7,9 +7,8 @@ python llm_infer.py \
  --eval_human false \
  --max_length 4096 \
  --max_new_tokens 2048 \
- --temperature 0.9 \
- --top_k 20 \
- --top_p 0.9 \
+ --temperature 0.1 \
+ --top_p 0.7 \
  --repetition_penalty 1.05 \
  --do_sample true \
  --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp_ds/infer.sh
@@ -7,9 +7,8 @@ python llm_infer.py \
  --eval_human false \
  --max_length 4096 \
  --max_new_tokens 2048 \
- --temperature 0.9 \
- --top_k 20 \
- --top_p 0.9 \
+ --temperature 0.1 \
+ --top_p 0.7 \
  --repetition_penalty 1.05 \
  --do_sample true \
  --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora_ddp_ds/infer.sh
@@ -7,9 +7,8 @@ python llm_infer.py \
  --eval_human false \
  --max_length 4096 \
  --max_new_tokens 2048 \
- --temperature 0.9 \
- --top_k 20 \
- --top_p 0.9 \
+ --temperature 0.1 \
+ --top_p 0.7 \
  --repetition_penalty 1.05 \
  --do_sample true \
  --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/baichuan2_7b_chat_int4/qlora_ddp_ds/infer.sh
@@ -7,9 +7,8 @@ python llm_infer.py \
  --eval_human false \
  --max_length 4096 \
  --max_new_tokens 2048 \
- --temperature 0.9 \
- --top_k 20 \
- --top_p 0.9 \
+ --temperature 0.1 \
+ --top_p 0.7 \
  --repetition_penalty 1.05 \
  --do_sample true \
  --merge_lora_and_save false \