update sh

modelscope · Jintao-Huang · Feb 22, 2024 · Feb 22, 2024 · Feb 22, 2024 · Feb 22, 2024
commit 58b1515cad0da743b7910fa0a0c5f5e3b6014882
diff --git a/examples/pytorch/llm/scripts/gemma_2b_instruct/lora/infer.sh b/examples/pytorch/llm/scripts/gemma_2b_instruct/lora/infer.sh
@@ -0,0 +1,11 @@
+# Experimental environment: A100
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+ --ckpt_dir "output/gemma-2b-instruct/vx_xxx/checkpoint-xxx" \
+ --load_dataset_config true \
+ --max_length 2048 \
+ --max_new_tokens 2048 \
+ --temperature 0.1 \
+ --top_p 0.7 \
+ --repetition_penalty 1. \
+ --do_sample true \
diff --git a/examples/pytorch/llm/scripts/gemma_2b_instruct/lora/sft.sh b/examples/pytorch/llm/scripts/gemma_2b_instruct/lora/sft.sh
@@ -0,0 +1,31 @@
+# Experimental environment: V100, A10, 3090
+# 12GB GPU memory
+
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+ --model_id_or_path AI-ModelScope/gemma-2b-it \
+ --sft_type lora \
+ --tuner_backend swift \
+ --template_type AUTO \
+ --dtype AUTO \
+ --output_dir output \
+ --dataset hc3-zh \
+ --train_dataset_sample 5000 \
+ --num_train_epochs 1 \
+ --max_length 2048 \
+ --check_dataset_strategy warning \
+ --lora_rank 8 \
+ --lora_alpha 32 \
+ --lora_dropout_p 0.05 \
+ --lora_target_modules ALL \
+ --gradient_checkpointing true \
+ --batch_size 1 \
+ --weight_decay 0.01 \
+ --learning_rate 1e-4 \
+ --gradient_accumulation_steps 16 \
+ --max_grad_norm 0.5 \
+ --warmup_ratio 0.1 \
+ --eval_steps 100 \
+ --save_steps 100 \
+ --save_total_limit 2 \
+ --logging_steps 10 \
diff --git a/swift/trainers/mixin.py b/swift/trainers/mixin.py
@@ -594,6 +594,11 @@ def _maybe_log_save_evaluate(self, tr_loss, *args, **kwargs):
  logs[k] = round(
  v_scalar /
  (self.state.global_step - self._globalstep_last_logged), 8)
+ if version.parse(
+ transformers.__version__) >= version.parse('4.38'):
+ grad_norm = args[0]
+ if grad_norm is not None:
+ logs['grad_norm'] = grad_norm
  logs['learning_rate'] = self._get_learning_rate()
 
  tr_loss -= tr_loss