Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#7109 from WenmuZhou/tipc1
Browse files Browse the repository at this point in the history
convert fp16 params to fp32 when params is fp16 format
  • Loading branch information
andyjiang1116 committed Aug 8, 2022
2 parents 9e4ae9d + c6738f4 commit 44852aa
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 6 deletions.
23 changes: 20 additions & 3 deletions ppocr/utils/save_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def load_model(config, model, optimizer=None, model_type='det'):
checkpoints = global_config.get('checkpoints')
pretrained_model = global_config.get('pretrained_model')
best_model_dict = {}
is_float16 = False

if model_type == 'vqa':
# NOTE: for vqa model, resume training is not supported now
Expand Down Expand Up @@ -100,14 +101,20 @@ def load_model(config, model, optimizer=None, model_type='det'):
key, params.keys()))
continue
pre_value = params[key]
if pre_value.dtype == paddle.float16:
pre_value = pre_value.astype(paddle.float32)
is_float16 = True
if list(value.shape) == list(pre_value.shape):
new_state_dict[key] = pre_value
else:
logger.warning(
"The shape of model params {} {} not matched with loaded params shape {} !".
format(key, value.shape, pre_value.shape))
model.set_state_dict(new_state_dict)

if is_float16:
logger.info(
"The parameter type is float16, which is converted to float32 when loading"
)
if optimizer is not None:
if os.path.exists(checkpoints + '.pdopt'):
optim_dict = paddle.load(checkpoints + '.pdopt')
Expand All @@ -126,9 +133,10 @@ def load_model(config, model, optimizer=None, model_type='det'):
best_model_dict['start_epoch'] = states_dict['epoch'] + 1
logger.info("resume from {}".format(checkpoints))
elif pretrained_model:
load_pretrained_params(model, pretrained_model)
is_float16 = load_pretrained_params(model, pretrained_model)
else:
logger.info('train from scratch')
best_model_dict['is_float16'] = is_float16
return best_model_dict


Expand All @@ -142,19 +150,28 @@ def load_pretrained_params(model, path):
params = paddle.load(path + '.pdparams')
state_dict = model.state_dict()
new_state_dict = {}
is_float16 = False
for k1 in params.keys():
if k1 not in state_dict.keys():
logger.warning("The pretrained params {} not in model".format(k1))
else:
if params[k1].dtype == paddle.float16:
params[k1] = params[k1].astype(paddle.float32)
is_float16 = True
if list(state_dict[k1].shape) == list(params[k1].shape):
new_state_dict[k1] = params[k1]
else:
logger.warning(
"The shape of model params {} {} not matched with loaded params {} {} !".
format(k1, state_dict[k1].shape, k1, params[k1].shape))

model.set_state_dict(new_state_dict)
if is_float16:
logger.info(
"The parameter type is float16, which is converted to float32 when loading"
)
logger.info("load pretrain successful from {}".format(path))
return model
return is_float16


def save_model(model,
Expand Down
2 changes: 1 addition & 1 deletion test_tipc/configs/layoutxlm_ser/train_infer_python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Global.use_gpu:True|True
Global.auto_cast:fp32
Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_lite_infer=8|whole_train_whole_infer=8
Train.loader.batch_size_per_card:lite_train_lite_infer=4|whole_train_whole_infer=8
Architecture.Backbone.checkpoints:null
train_model_name:latest
train_infer_img_dir:ppstructure/docs/vqa/input/zh_val_42.jpg
Expand Down
4 changes: 2 additions & 2 deletions test_tipc/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ if [ ${MODE} = "benchmark_train" ];then
fi
if [ ${model_name} == "layoutxlm_ser" ]; then
pip install -r ppstructure/vqa/requirements.txt
pip install paddlenlp\>=2.3.5 --force-reinstall
pip install paddlenlp\>=2.3.5 --force-reinstall -i https://mirrors.aliyun.com/pypi/simple/
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
cd ./train_data/ && tar xf XFUND.tar
# expand gt.txt 10 times
Expand Down Expand Up @@ -222,7 +222,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
fi
if [ ${model_name} == "layoutxlm_ser" ]; then
pip install -r ppstructure/vqa/requirements.txt
pip install paddlenlp\>=2.3.5 --force-reinstall
pip install paddlenlp\>=2.3.5 --force-reinstall -i https://mirrors.aliyun.com/pypi/simple/
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
cd ./train_data/ && tar xf XFUND.tar
cd ../
Expand Down

0 comments on commit 44852aa

Please sign in to comment.