Merge pull request PaddlePaddle#7109 from WenmuZhou/tipc1

convert fp16 params to fp32 when params is fp16 format
tink2123 · Aug 8, 2022 · 44852aa · 44852aa
2 parents 9e4ae9d + c6738f4
commit 44852aa
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 6 deletions.
diff --git a/ppocr/utils/save_load.py b/ppocr/utils/save_load.py
@@ -53,6 +53,7 @@ def load_model(config, model, optimizer=None, model_type='det'):
  checkpoints = global_config.get('checkpoints')
  pretrained_model = global_config.get('pretrained_model')
  best_model_dict = {}
+ is_float16 = False
 
  if model_type == 'vqa':
  # NOTE: for vqa model, resume training is not supported now
@@ -100,14 +101,20 @@ def load_model(config, model, optimizer=None, model_type='det'):
  key, params.keys()))
  continue
  pre_value = params[key]
+ if pre_value.dtype == paddle.float16:
+ pre_value = pre_value.astype(paddle.float32)
+ is_float16 = True
  if list(value.shape) == list(pre_value.shape):
  new_state_dict[key] = pre_value
  else:
  logger.warning(
  "The shape of model params {} {} not matched with loaded params shape {} !".
  format(key, value.shape, pre_value.shape))
  model.set_state_dict(new_state_dict)
-
+ if is_float16:
+ logger.info(
+ "The parameter type is float16, which is converted to float32 when loading"
+ )
  if optimizer is not None:
  if os.path.exists(checkpoints + '.pdopt'):
  optim_dict = paddle.load(checkpoints + '.pdopt')
@@ -126,9 +133,10 @@ def load_model(config, model, optimizer=None, model_type='det'):
  best_model_dict['start_epoch'] = states_dict['epoch'] + 1
  logger.info("resume from {}".format(checkpoints))
  elif pretrained_model:
- load_pretrained_params(model, pretrained_model)
+ is_float16 = load_pretrained_params(model, pretrained_model)
  else:
  logger.info('train from scratch')
+ best_model_dict['is_float16'] = is_float16
  return best_model_dict
 
 
@@ -142,19 +150,28 @@ def load_pretrained_params(model, path):
  params = paddle.load(path + '.pdparams')
  state_dict = model.state_dict()
  new_state_dict = {}
+ is_float16 = False
  for k1 in params.keys():
  if k1 not in state_dict.keys():
  logger.warning("The pretrained params {} not in model".format(k1))
  else:
+ if params[k1].dtype == paddle.float16:
+ params[k1] = params[k1].astype(paddle.float32)
+ is_float16 = True
  if list(state_dict[k1].shape) == list(params[k1].shape):
  new_state_dict[k1] = params[k1]
  else:
  logger.warning(
  "The shape of model params {} {} not matched with loaded params {} {} !".
  format(k1, state_dict[k1].shape, k1, params[k1].shape))
+
  model.set_state_dict(new_state_dict)
+ if is_float16:
+ logger.info(
+ "The parameter type is float16, which is converted to float32 when loading"
+ )
  logger.info("load pretrain successful from {}".format(path))
- return model
+ return is_float16
 
 
 def save_model(model,

diff --git a/test_tipc/configs/layoutxlm_ser/train_infer_python.txt b/test_tipc/configs/layoutxlm_ser/train_infer_python.txt
@@ -6,7 +6,7 @@ Global.use_gpu:True|True
 Global.auto_cast:fp32
 Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17
 Global.save_model_dir:./output/
-Train.loader.batch_size_per_card:lite_train_lite_infer=8|whole_train_whole_infer=8
+Train.loader.batch_size_per_card:lite_train_lite_infer=4|whole_train_whole_infer=8
 Architecture.Backbone.checkpoints:null
 train_model_name:latest
 train_infer_img_dir:ppstructure/docs/vqa/input/zh_val_42.jpg

diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
@@ -108,7 +108,7 @@ if [ ${MODE} = "benchmark_train" ];then
  fi
  if [ ${model_name} == "layoutxlm_ser" ]; then
  pip install -r ppstructure/vqa/requirements.txt
- pip install paddlenlp\>=2.3.5 --force-reinstall
+ pip install paddlenlp\>=2.3.5 --force-reinstall -i https://mirrors.aliyun.com/pypi/simple/
  wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
  cd ./train_data/ && tar xf XFUND.tar
  # expand gt.txt 10 times
@@ -222,7 +222,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
  fi
  if [ ${model_name} == "layoutxlm_ser" ]; then
  pip install -r ppstructure/vqa/requirements.txt
- pip install paddlenlp\>=2.3.5 --force-reinstall
+ pip install paddlenlp\>=2.3.5 --force-reinstall -i https://mirrors.aliyun.com/pypi/simple/
  wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
  cd ./train_data/ && tar xf XFUND.tar
  cd ../